{"id":"https://openalex.org/W7140234916","doi":"https://doi.org/10.48550/arxiv.2603.20530","title":"Memory Over Maps: 3D Object Localization Without Reconstruction","display_name":"Memory Over Maps: 3D Object Localization Without Reconstruction","publication_year":2026,"publication_date":"2026-03-20","ids":{"openalex":"https://openalex.org/W7140234916","doi":"https://doi.org/10.48550/arxiv.2603.20530"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.20530","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20530","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.20530","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zhou, Rui","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhou, Rui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yap, Xander","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yap, Xander","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Cao, Jianwen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Jianwen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Lau, Allison","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lau, Allison","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sun, Boyang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Boyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Pollefeys, Marc","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pollefeys, Marc","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6144000291824341,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6144000291824341,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.1988999992609024,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.041600000113248825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6632000207901001},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6173999905586243},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5791000127792358},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5616999864578247},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.512499988079071},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5022000074386597},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.426800012588501},{"id":"https://openalex.org/keywords/visual-hull","display_name":"Visual hull","score":0.424699991941452},{"id":"https://openalex.org/keywords/3d-reconstruction","display_name":"3D reconstruction","score":0.41519999504089355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8079000115394592},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7275000214576721},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7185999751091003},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6632000207901001},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6173999905586243},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5791000127792358},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5616999864578247},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.512499988079071},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5022000074386597},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.426800012588501},{"id":"https://openalex.org/C2776863239","wikidata":"https://www.wikidata.org/wiki/Q7936601","display_name":"Visual hull","level":3,"score":0.424699991941452},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.41519999504089355},{"id":"https://openalex.org/C86369673","wikidata":"https://www.wikidata.org/wiki/Q1203659","display_name":"Simultaneous localization and mapping","level":4,"score":0.3926999866962433},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.3901999890804291},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3885999917984009},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.3765000104904175},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3596999943256378},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C54170458","wikidata":"https://www.wikidata.org/wiki/Q663554","display_name":"Voxel","level":2,"score":0.3571999967098236},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.34779998660087585},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.32589998841285706},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C197654239","wikidata":"https://www.wikidata.org/wiki/Q7430757","display_name":"Scene statistics","level":3,"score":0.27559998631477356},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2653000056743622},{"id":"https://openalex.org/C2776937971","wikidata":"https://www.wikidata.org/wiki/Q4384217","display_name":"Heading (navigation)","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C4069607","wikidata":"https://www.wikidata.org/wiki/Q868732","display_name":"Aliasing","level":3,"score":0.26190000772476196},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.20530","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20530","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.20530","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.20530","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Target":[0],"localization":[1,85],"is":[2,69,155],"a":[3,66,70,88,98,121,126],"prerequisite":[4],"for":[5,76,207],"embodied":[6],"tasks":[7],"such":[8,26],"as":[9,27,97],"navigation":[10,178],"and":[11,45,86,124,138],"manipulation.":[12],"Conventional":[13],"approaches":[14],"rely":[15],"on":[16,62,175],"constructing":[17,102],"explicit":[18],"3D":[19,72,105,129,205],"scene":[20,33,73,152,199],"representations":[21],"to":[22,142,162],"enable":[23],"target":[24,134],"localization,":[25],"point":[28],"clouds,":[29],"voxel":[30],"grids,":[31],"or":[32],"graphs.":[34],"While":[35],"effective,":[36],"these":[37],"pipelines":[38],"incur":[39],"substantial":[40],"mapping":[41],"time,":[42,112],"storage":[43],"overhead,":[44],"scalability":[46],"limitations.":[47],"Recent":[48],"advances":[49],"in":[50],"vision-language":[51,122],"models":[52],"suggest":[53],"that":[54,91,154,194],"rich":[55],"semantic":[56],"reasoning":[57,196],"can":[58,201],"be":[59],"performed":[60],"directly":[61],"2D":[63],"observations,":[64],"raising":[65],"fundamental":[67],"question:":[68],"complete":[71],"reconstruction":[74,206],"necessary":[75],"object":[77,84],"localization?":[78],"In":[79],"this":[80,145],"work,":[81],"we":[82],"revisit":[83],"propose":[87],"map-free":[89],"pipeline":[90],"stores":[92],"only":[93],"posed":[94],"RGB-D":[95],"keyframes":[96],"lightweight":[99],"visual":[100],"memory--without":[101],"any":[103],"global":[104],"representation":[106],"of":[107,131,159],"the":[108,132,172],"scene.":[109],"At":[110],"query":[111],"our":[113,185],"method":[114],"retrieves":[115],"candidate":[116],"views,":[117],"re-ranks":[118],"them":[119],"with":[120],"model,":[123],"constructs":[125],"sparse,":[127],"on-demand":[128],"estimate":[130],"queried":[133],"through":[135],"depth":[136],"backprojection":[137],"multi-view":[139],"fusion.":[140],"Compared":[141],"reconstruction-based":[143],"pipelines,":[144],"design":[146],"drastically":[147],"reduces":[148],"preprocessing":[149],"cost,":[150],"enabling":[151],"indexing":[153],"over":[156,197],"two":[157],"orders":[158],"magnitude":[160],"faster":[161],"build":[163],"while":[164],"using":[165],"substantially":[166],"less":[167],"storage.":[168],"We":[169],"further":[170],"validate":[171],"localized":[173],"targets":[174],"downstream":[176],"object-goal":[177],"tasks.":[179],"Despite":[180],"requiring":[181],"no":[182],"task-specific":[183],"training,":[184],"approach":[186],"achieves":[187],"strong":[188],"performance":[189],"across":[190],"multiple":[191],"benchmarks,":[192],"demonstrating":[193],"direct":[195],"image-based":[198],"memory":[200],"effectively":[202],"replace":[203],"dense":[204],"object-centric":[208],"robot":[209],"navigation.":[210],"Project":[211],"page:":[212],"https://ruizhou-cn.github.io/memory-over-maps/":[213]},"counts_by_year":[],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2026-03-25T00:00:00"}
