{"id":"https://openalex.org/W7133338682","doi":"https://doi.org/10.48550/arxiv.2603.01613","title":"Uncertainty-Aware Hierarchical Re-Localization in OpenStreetMap via Semantic Alignment","display_name":"Uncertainty-Aware Hierarchical Re-Localization in OpenStreetMap via Semantic Alignment","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7133338682","doi":"https://doi.org/10.48550/arxiv.2603.01613"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.01613","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01613","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.01613","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125470431","display_name":"Yuchen Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Yuchen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127914422","display_name":"Xiao Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Xiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020071899","display_name":"Dexing Zhong","orcid":"https://orcid.org/0000-0002-6806-6300"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Lihuang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127944344","display_name":"Yuqing Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Yuqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.398499995470047,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.398499995470047,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.2549999952316284,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.13030000030994415,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.65420001745224},{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.6251000165939331},{"id":"https://openalex.org/keywords/semantic-mapping","display_name":"Semantic mapping","score":0.5942999720573425},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5026000142097473},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.49570000171661377},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.447299987077713},{"id":"https://openalex.org/keywords/orientation","display_name":"Orientation (vector space)","score":0.44339999556541443},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.44119998812675476}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8064000010490417},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.65420001745224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6335999965667725},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.6251000165939331},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.5942999720573425},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5026000142097473},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.49570000171661377},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.447299987077713},{"id":"https://openalex.org/C16345878","wikidata":"https://www.wikidata.org/wiki/Q107472979","display_name":"Orientation (vector space)","level":2,"score":0.44339999556541443},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.44119998812675476},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4381999969482422},{"id":"https://openalex.org/C2778180026","wikidata":"https://www.wikidata.org/wiki/Q18378163","display_name":"Semantic heterogeneity","level":4,"score":0.38989999890327454},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3538999855518341},{"id":"https://openalex.org/C158829959","wikidata":"https://www.wikidata.org/wiki/Q1640606","display_name":"Monocular vision","level":2,"score":0.29510000348091125},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.29019999504089355},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.2833000123500824},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2782000005245209},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.27149999141693115},{"id":"https://openalex.org/C110903229","wikidata":"https://www.wikidata.org/wiki/Q7449064","display_name":"Semantic integration","level":4,"score":0.26350000500679016},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2524999976158142},{"id":"https://openalex.org/C166423231","wikidata":"https://www.wikidata.org/wiki/Q1891170","display_name":"Semantic search","level":3,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.01613","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01613","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.01613","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01613","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.4737285375595093,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Monocular":[0],"re-localization":[1],"enables":[2],"robots":[3],"to":[4,52,93],"estimate":[5],"camera":[6],"poses":[7],"from":[8],"visual":[9],"observations.":[10],"However,":[11],"many":[12],"existing":[13],"methods":[14],"rely":[15],"on":[16,132],"dense":[17,106],"maps":[18],"or":[19],"large":[20],"reference":[21],"image":[22],"databases,":[23],"which":[24],"face":[25],"scalability":[26],"limitations":[27],"and":[28,40,58,101,114,128],"privacy":[29],"risks.":[30],"OpenStreetMap":[31],"(OSM),":[32],"as":[33,60,62],"a":[34,133],"lightweight":[35],"privacy-preserving":[36],"map,":[37],"offers":[38],"semantic":[39,81,96],"geometric":[41],"information":[42],"with":[43,80],"global":[44,67,105],"scalability.":[45],"Nonetheless,":[46],"OSM":[47,102],"localization":[48,84,126],"remains":[49],"challenging":[50],"due":[51],"cross-modal":[53],"discrepancies":[54],"between":[55,98],"natural":[56],"images":[57],"OSM,":[59],"well":[61],"the":[63,95,136,145],"high":[64],"cost":[65],"of":[66,140,148],"map-based":[68],"localization.":[69],"In":[70],"this":[71],"paper,":[72],"we":[73],"propose":[74],"an":[75],"uncertainty-aware":[76],"hierarchical":[77],"search":[78],"framework":[79],"alignment":[82],"for":[83],"in":[85],"OSM.":[86],"First,":[87],"object-centric":[88],"DINO-ViT":[89],"tokens":[90],"are":[91],"exploited":[92],"reduce":[94],"gap":[97],"ground-view":[99],"observations":[100],"vectors.":[103],"Second,":[104],"matching":[107],"is":[108],"decomposed":[109],"into":[110],"coarse":[111],"FFT":[112],"correlation":[113],"uncertainty-controlled":[115],"local":[116],"refinement.":[117],"Extensive":[118],"experiments":[119],"demonstrate":[120],"that":[121],"our":[122,141],"method":[123,142],"significantly":[124],"improves":[125],"accuracy":[127],"speed.":[129],"When":[130],"trained":[131],"single":[134],"dataset,":[135],"3$^\\circ$":[137],"orientation":[138],"recall":[139,147],"even":[143],"outperforms":[144],"5$^\\circ$":[146],"state-of-the-art":[149],"methods.":[150]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-04T00:00:00"}
