{"id":"https://openalex.org/W4416748396","doi":"https://doi.org/10.1109/iros60139.2025.11247702","title":"VecNav: Vector Goal Robot Navigation from In-the-wild Videos","display_name":"VecNav: Vector Goal Robot Navigation from In-the-wild Videos","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416748396","doi":"https://doi.org/10.1109/iros60139.2025.11247702"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11247702","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247702","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016013285","display_name":"Ruixiang Cao","orcid":"https://orcid.org/0000-0003-4401-2700"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]},{"id":"https://openalex.org/I39012071","display_name":"Kyoto College of Graduate Studies for Informatics","ror":"https://ror.org/05mzj8a56","country_code":"JP","type":"education","lineage":["https://openalex.org/I39012071"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ruixiang Cao","raw_affiliation_strings":["Kyoto University,Learning Machines Group, Graduate School of Informatics,Kyoto,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kyoto University,Learning Machines Group, Graduate School of Informatics,Kyoto,Japan","institution_ids":["https://openalex.org/I22299242","https://openalex.org/I39012071"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027216065","display_name":"Satoshi Yagi","orcid":"https://orcid.org/0000-0002-9572-089X"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]},{"id":"https://openalex.org/I39012071","display_name":"Kyoto College of Graduate Studies for Informatics","ror":"https://ror.org/05mzj8a56","country_code":"JP","type":"education","lineage":["https://openalex.org/I39012071"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoshi Yagi","raw_affiliation_strings":["Kyoto University,Learning Machines Group, Graduate School of Informatics,Kyoto,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kyoto University,Learning Machines Group, Graduate School of Informatics,Kyoto,Japan","institution_ids":["https://openalex.org/I22299242","https://openalex.org/I39012071"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015376821","display_name":"Satoshi Yamamori","orcid":"https://orcid.org/0009-0001-0712-268X"},"institutions":[{"id":"https://openalex.org/I4210104143","display_name":"Advanced Telecommunications Research Institute International","ror":"https://ror.org/01pe1d703","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210104143"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoshi Yamamori","raw_affiliation_strings":["ATR,Computational Neuroscience Labs,Department of Brain Robot Interface,Kyoto,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ATR,Computational Neuroscience Labs,Department of Brain Robot Interface,Kyoto,Japan","institution_ids":["https://openalex.org/I4210104143"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025458882","display_name":"Jun Morimoto","orcid":"https://orcid.org/0000-0002-4115-1919"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]},{"id":"https://openalex.org/I39012071","display_name":"Kyoto College of Graduate Studies for Informatics","ror":"https://ror.org/05mzj8a56","country_code":"JP","type":"education","lineage":["https://openalex.org/I39012071"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Jun Morimoto","raw_affiliation_strings":["Kyoto University,Learning Machines Group, Graduate School of Informatics,Kyoto,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kyoto University,Learning Machines Group, Graduate School of Informatics,Kyoto,Japan","institution_ids":["https://openalex.org/I22299242","https://openalex.org/I39012071"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.40053271,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"17207","last_page":"17214"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.2517000138759613,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.2517000138759613,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.13169999420642853,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.1266999989748001,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intrinsics","display_name":"Intrinsics","score":0.7301999926567078},{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.6852999925613403},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5217000246047974},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.46320000290870667},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.353300005197525},{"id":"https://openalex.org/keywords/visual-odometry","display_name":"Visual odometry","score":0.34619998931884766},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.34049999713897705}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7753999829292297},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7678999900817871},{"id":"https://openalex.org/C2908650547","wikidata":"https://www.wikidata.org/wiki/Q20999234","display_name":"Intrinsics","level":2,"score":0.7301999926567078},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.696399986743927},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.6852999925613403},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5217000246047974},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.46320000290870667},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.353300005197525},{"id":"https://openalex.org/C5799516","wikidata":"https://www.wikidata.org/wiki/Q4110915","display_name":"Visual odometry","level":3,"score":0.34619998931884766},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.34049999713897705},{"id":"https://openalex.org/C93226319","wikidata":"https://www.wikidata.org/wiki/Q193137","display_name":"Differential (mechanical device)","level":2,"score":0.33239999413490295},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.3057999908924103},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.28940001130104065},{"id":"https://openalex.org/C26990112","wikidata":"https://www.wikidata.org/wiki/Q6887224","display_name":"Mobile robot navigation","level":5,"score":0.2782999873161316},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.2685000002384186},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.2513999938964844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11247702","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247702","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1612997784","https://openalex.org/W2108598243","https://openalex.org/W2535547924","https://openalex.org/W2963800628","https://openalex.org/W3009928773","https://openalex.org/W3043971245","https://openalex.org/W3175995235","https://openalex.org/W4236325002","https://openalex.org/W4283784462","https://openalex.org/W4285102400","https://openalex.org/W4385245566","https://openalex.org/W4385403811","https://openalex.org/W4385430674","https://openalex.org/W4385430679","https://openalex.org/W4390871913","https://openalex.org/W4390871919","https://openalex.org/W4390874575"],"related_works":[],"abstract_inverted_index":{"We":[0,62,124],"propose":[1],"VecNav,":[2],"a":[3,8,65,71,75,89,110,129],"novel":[4],"approach":[5,49],"that":[6,69,148],"trains":[7],"monocular":[9,59,99],"navigation":[10,101,112],"model":[11,113],"through":[12],"self-supervision":[13],"using":[14,58,139],"uncalibrated,":[15],"human-captured":[16],"videos.":[17],"These":[18],"videos,":[19],"characterized":[20],"by":[21,74],"unknown":[22],"camera":[23],"intrinsics":[24],"and":[25,35,77,82,92],"extrinsics,":[26],"are":[27,36],"readily":[28],"available":[29],"from":[30,55],"video-sharing":[31],"platforms":[32],"(e.g.":[33],"YouTube)":[34],"referred":[37],"to":[38,43,97,108,136,152],"as":[39,80],"\"in-the-wild\"":[40,141,162],"videos":[41,57,142],"due":[42],"their":[44],"unregulated":[45],"capture":[46],"conditions.":[47],"Our":[48,86,145],"involves":[50],"estimating":[51],"ground":[52],"truth":[53],"trajectories":[54],"these":[56],"visual":[60,100,156],"odometry.":[61],"then":[63],"train":[64],"transformer-based":[66],"diffusion":[67],"policy":[68],"takes":[70],"goal":[72],"specified":[73],"vector":[76],"RGB":[78],"images":[79],"input":[81],"generates":[83],"action":[84],"predictions.":[85],"method":[87,127],"leverages":[88],"significantly":[90],"larger":[91],"more":[93],"diverse":[94],"dataset":[95],"compared":[96],"existing":[98],"approaches.":[102],"This":[103],"diversity":[104],"holds":[105],"the":[106],"potential":[107],"develop":[109],"generalist":[111],"capable":[114],"of":[115,119],"guiding":[116],"various":[117],"types":[118],"robots":[120],"in":[121],"unfamiliar":[122],"environments.":[123],"evaluated":[125],"our":[126],"on":[128,155,160],"differential":[130],"drive":[131],"robot,":[132],"demonstrating":[133],"its":[134],"capability":[135],"effectively":[137],"navigate":[138],"solely":[140,159],"for":[143],"training.":[144],"experiments":[146],"demonstrate":[147],"VecNav":[149],"successfully":[150],"learned":[151],"act":[153],"based":[154],"affordances,":[157],"relying":[158],"uncalibrated":[161],"data.":[163]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-28T00:00:00"}
