{"id":"https://openalex.org/W4417183158","doi":"https://doi.org/10.1109/3dv69130.2026.00067","title":"What Does Really Matter in Image Goal Navigation?","display_name":"What Does Really Matter in Image Goal Navigation?","publication_year":2026,"publication_date":"2026-03-20","ids":{"openalex":"https://openalex.org/W4417183158","doi":"https://doi.org/10.1109/3dv69130.2026.00067"},"language":"en","primary_location":{"id":"doi:10.1109/3dv69130.2026.00067","is_oa":false,"landing_page_url":"https://doi.org/10.1109/3dv69130.2026.00067","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on 3D Vision (3DV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2507.01667","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077875525","display_name":"Gianluca Monaci","orcid":"https://orcid.org/0000-0001-5514-8457"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gianluca Monaci","raw_affiliation_strings":["NAVER LABS Europe,Meylan,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NAVER LABS Europe,Meylan,France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054497181","display_name":"Philippe Weinzaepfel","orcid":"https://orcid.org/0000-0002-4223-3983"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Philippe Weinzaepfel","raw_affiliation_strings":["NAVER LABS Europe,Meylan,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NAVER LABS Europe,Meylan,France","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101613966","display_name":"Christian Wolf","orcid":"https://orcid.org/0000-0001-9766-3211"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Christian Wolf","raw_affiliation_strings":["NAVER LABS Europe,Meylan,France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NAVER LABS Europe,Meylan,France","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01349762,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"640","last_page":"651"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.25099998712539673,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.25099998712539673,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.16599999368190765,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.15379999577999115,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.619700014591217},{"id":"https://openalex.org/keywords/pose","display_name":"Pose","score":0.6061999797821045},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.4803999960422516},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.44749999046325684},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.3637999892234802},{"id":"https://openalex.org/keywords/goal-orientation","display_name":"Goal orientation","score":0.335999995470047},{"id":"https://openalex.org/keywords/navigation-system","display_name":"Navigation system","score":0.3246999979019165},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.3192000091075897}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6931999921798706},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6273000240325928},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.619700014591217},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.6061999797821045},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5795999765396118},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.4803999960422516},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.44749999046325684},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.430400013923645},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.3637999892234802},{"id":"https://openalex.org/C84653758","wikidata":"https://www.wikidata.org/wiki/Q5575175","display_name":"Goal orientation","level":2,"score":0.335999995470047},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32670000195503235},{"id":"https://openalex.org/C2777891301","wikidata":"https://www.wikidata.org/wiki/Q3475123","display_name":"Navigation system","level":2,"score":0.3246999979019165},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.3192000091075897},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3147999942302704},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.31189998984336853},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.289000004529953},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2773999869823456},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.2581000030040741},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2540999948978424}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/3dv69130.2026.00067","is_oa":false,"landing_page_url":"https://doi.org/10.1109/3dv69130.2026.00067","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on 3D Vision (3DV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2507.01667","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.01667","pdf_url":"https://arxiv.org/pdf/2507.01667","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:doi:10.48550/arxiv.2507.01667","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2507.01667","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.01667","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2507.01667","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.01667","pdf_url":"https://arxiv.org/pdf/2507.01667","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Image":[0],"goal":[1,38],"navigation":[2,9,104,138,188],"requires":[3],"two":[4],"different":[5],"skills:":[6],"firstly,":[7],"core":[8],"skills,":[10],"including":[11],"the":[12,37,113,131,143],"detection":[13],"of":[14,50,73,97,115,133,145],"free":[15],"space":[16],"and":[17,19,27,94,125,127,190],"obstacles,":[18],"taking":[20],"decisions":[21],"based":[22],"on":[23,45,54],"an":[24,197],"internal":[25],"representation;":[26],"secondly,":[28],"computing":[29],"directional":[30],"information":[31],"by":[32,82,155],"comparing":[33],"visual":[34],"observations":[35],"to":[36,151,159,173,178],"image.":[39],"Current":[40],"state-of-the-art":[41],"methods":[42,147],"either":[43],"rely":[44],"dedicated":[46],"image-matching,":[47],"or":[48],"pre-training":[49],"computer":[51],"vision":[52],"modules":[53],"relative":[55,98,134,193],"pose":[56,99,135,194],"estimation.":[57],"In":[58,106],"this":[59,64,107],"paper,":[60],"we":[61,111,164],"study":[62,110],"whether":[63],"task":[65],"can":[66,170],"be":[67,171],"efficiently":[68],"solved":[69],"with":[70,76],"end-to-end":[71],"training":[72,96],"full":[74],"agents":[75],"RL,":[77],"as":[78],"has":[79],"been":[80],"claimed":[81],"recent":[83,146],"work.":[84],"A":[85],"positive":[86],"answer":[87],"would":[88],"have":[89],"impact":[90],"beyond":[91],"Embodied":[92],"AI":[93],"allow":[95],"estimation":[100,195],"from":[101,137],"reward":[102],"for":[103,185],"alone.":[105],"large":[108],"experimental":[109],"investigate":[112],"effect":[114],"architectural":[116],"choices":[117],"like":[118],"late":[119],"fusion,":[120],"channel":[121],"stacking,":[122],"space-to-depth":[123],"projections":[124],"cross-attention,":[126],"their":[128],"role":[129],"in":[130,161],"emergence":[132],"estimators":[136],"training.":[139],"We":[140,181],"show":[141,166],"that":[142,167],"success":[144],"is":[148],"influenced":[149],"up":[150,177],"a":[152],"certain":[153],"extent":[154],"simulator":[156],"settings,":[157],"leading":[158],"shortcuts":[160],"simulation.":[162],"However,":[163],"also":[165,182],"these":[168],"capabilities":[169],"transferred":[172],"more":[174],"realistic":[175],"setting,":[176],"some":[179],"extent.":[180],"find":[183],"evidence":[184],"correlations":[186],"between":[187],"performance":[189],"probed":[191],"(emerging)":[192],"performance,":[196],"important":[198],"sub":[199],"skill.":[200]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
