{"id":"https://openalex.org/W7155063517","doi":"https://doi.org/10.48550/arxiv.2604.17407","title":"Think before Go: Hierarchical Reasoning for Image-goal Navigation","display_name":"Think before Go: Hierarchical Reasoning for Image-goal Navigation","publication_year":2026,"publication_date":"2026-04-19","ids":{"openalex":"https://openalex.org/W7155063517","doi":"https://doi.org/10.48550/arxiv.2604.17407"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.17407","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17407","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.17407","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048567115","display_name":"Pengna Li","orcid":"https://orcid.org/0000-0002-8477-8340"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Pengna","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034105691","display_name":"Kangyi Wu","orcid":"https://orcid.org/0000-0001-7382-4949"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Kangyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134115879","display_name":"Shaoqing Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Shaoqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134104070","display_name":"Fang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Fang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134136966","display_name":"Lin Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Lin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134174440","display_name":"Long Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Long","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134133757","display_name":"Zhi-Xin Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Zhi-Xin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134139950","display_name":"Nanning Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Nanning","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8873999714851379,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8873999714851379,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.03550000116229057,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.012500000186264515,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7067000269889832},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.516700029373169},{"id":"https://openalex.org/keywords/cognitive-map","display_name":"Cognitive map","score":0.39590001106262207},{"id":"https://openalex.org/keywords/hierarchical-database-model","display_name":"Hierarchical database model","score":0.3952000141143799},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.3707999885082245},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.3589000105857849},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3264999985694885}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7573000192642212},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7067000269889832},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6518999934196472},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.516700029373169},{"id":"https://openalex.org/C170494330","wikidata":"https://www.wikidata.org/wiki/Q1778434","display_name":"Cognitive map","level":3,"score":0.39590001106262207},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.3952000141143799},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.3707999885082245},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3628000020980835},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3589000105857849},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3292999863624573},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3264999985694885},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.27889999747276306},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C193611912","wikidata":"https://www.wikidata.org/wiki/Q4677596","display_name":"Active vision","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.2685000002384186},{"id":"https://openalex.org/C40506919","wikidata":"https://www.wikidata.org/wiki/Q7452469","display_name":"Sequence learning","level":2,"score":0.2669999897480011},{"id":"https://openalex.org/C2777891301","wikidata":"https://www.wikidata.org/wiki/Q3475123","display_name":"Navigation system","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C124527596","wikidata":"https://www.wikidata.org/wiki/Q17029359","display_name":"Hierarchical control system","level":3,"score":0.2526000142097473},{"id":"https://openalex.org/C2989549987","wikidata":"https://www.wikidata.org/wiki/Q350882","display_name":"Route planning","level":2,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.17407","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17407","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.17407","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17407","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Image-goal":[0],"navigation":[1,26,95],"steers":[2],"an":[3,11,24,152],"agent":[4,63,123],"to":[5,56,64,114,145,159,176],"a":[6,90,105,111,116,170,186],"target":[7,33,45],"location":[8],"specified":[9],"by":[10,22,68],"image":[12],"in":[13,50,81,194],"unseen":[14],"environments.":[15],"Existing":[16],"methods":[17,54],"primarily":[18],"handle":[19],"this":[20],"task":[21],"learning":[23,155],"end-to-end":[25],"policy,":[27],"which":[28],"compares":[29],"the":[30,40,44,62,69,122,127,131,135,138,146,164,179,201],"similarities":[31],"of":[32,137,203],"and":[34,37,99,197],"observation":[35],"images":[36],"directly":[38],"predicts":[39],"actions.":[41],"However,":[42],"when":[43],"is":[46,108,157],"distant":[47],"or":[48,129],"lies":[49],"another":[51],"room,":[52],"such":[53,119],"fail":[55],"extract":[57],"informative":[58],"visual":[59],"cues,":[60],"leading":[61],"wander":[65],"around.":[66],"Motivated":[67],"human":[70],"cognitive":[71],"principle":[72],"that":[73,92],"deliberate,":[74],"high-level":[75,97,103],"reasoning":[76],"guides":[77],"fast,":[78],"reactive":[79],"execution":[80,147],"complex":[82],"tasks,":[83],"we":[84],"propose":[85],"Hierarchical":[86],"Reasoning":[87],"Navigation":[88],"(HRNav),":[89],"framework":[91,188],"decomposes":[93],"image-goal":[94],"into":[96],"planning":[98],"low-level":[100,150],"execution.":[101],"In":[102,149],"planning,":[104],"vision-language":[106],"model":[107],"trained":[109],"on":[110,163],"self-collected":[112],"dataset":[113],"generate":[115],"short-horizon":[117,165],"plan,":[118],"as":[120],"whether":[121],"should":[124],"walk":[125],"through":[126],"door":[128],"down":[130],"hallway.":[132],"This":[133],"downgrades":[134],"difficulty":[136],"long-horizon":[139],"task,":[140],"making":[141],"it":[142],"more":[143],"amenable":[144],"part.":[148],"execution,":[151],"online":[153],"reinforcement":[154],"policy":[156],"utilized":[158],"decide":[160],"actions":[161],"conditioned":[162],"plan.":[166],"We":[167],"also":[168],"devise":[169],"novel":[171],"Wandering":[172],"Suppression":[173],"Penalty":[174],"(WSP)":[175],"further":[177],"reduce":[178],"wandering":[180],"problem.":[181],"Together,":[182],"these":[183],"components":[184],"form":[185],"hierarchical":[187],"for":[189],"Image-Goal":[190],"Navigation.":[191],"Extensive":[192],"experiments":[193],"both":[195],"simulation":[196],"real-world":[198],"environments":[199],"demonstrate":[200],"superiority":[202],"our":[204],"method.":[205]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-22T00:00:00"}
