{"id":"https://openalex.org/W7162787002","doi":"https://doi.org/10.1145/3745756.3809218","title":"ZA-SLAM: Leveraging Vision-Language Model for Zero-Shot Acoustic SLAM","display_name":"ZA-SLAM: Leveraging Vision-Language Model for Zero-Shot Acoustic SLAM","publication_year":2026,"publication_date":"2026-05-29","ids":{"openalex":"https://openalex.org/W7162787002","doi":"https://doi.org/10.1145/3745756.3809218"},"language":null,"primary_location":{"id":"doi:10.1145/3745756.3809218","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3745756.3809218","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th Annual International Conference on Mobile Systems, Applications and Services","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3745756.3809218","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003422303","display_name":"Zhuochen Yu","orcid":"https://orcid.org/0000-0001-9962-1322"},"institutions":[{"id":"https://openalex.org/I152815399","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87","country_code":"SG","type":"education","lineage":["https://openalex.org/I152815399"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Zhuochen Yu","raw_affiliation_strings":["Singapore University of Technology and Design, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0009-0008-4914-3949","affiliations":[{"raw_affiliation_string":"Singapore University of Technology and Design, Singapore, Singapore","institution_ids":["https://openalex.org/I152815399"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063936654","display_name":"David K. Y. Yau","orcid":"https://orcid.org/0000-0001-9061-7423"},"institutions":[{"id":"https://openalex.org/I152815399","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87","country_code":"SG","type":"education","lineage":["https://openalex.org/I152815399"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"David K. Y. Yau","raw_affiliation_strings":["Singapore University of Technology &amp; Design, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0001-9061-7423","affiliations":[{"raw_affiliation_string":"Singapore University of Technology &amp; Design, Singapore, Singapore","institution_ids":["https://openalex.org/I152815399"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044206299","display_name":"Yijie Shen","orcid":"https://orcid.org/0000-0003-1202-2525"},"institutions":[{"id":"https://openalex.org/I30809798","display_name":"ShanghaiTech University","ror":"https://ror.org/030bhh786","country_code":"CN","type":"education","lineage":["https://openalex.org/I30809798"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yijie Shen","raw_affiliation_strings":["ShanghaiTech University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0004-3896-411X","affiliations":[{"raw_affiliation_string":"ShanghaiTech University, Shanghai, China","institution_ids":["https://openalex.org/I30809798"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137325032","display_name":"Xiaoran Fan","orcid":"https://orcid.org/0000-0002-6368-9250"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoran Fan","raw_affiliation_strings":["Google, Mountain View, USA"],"raw_orcid":"https://orcid.org/0000-0002-6368-9250","affiliations":[{"raw_affiliation_string":"Google, Mountain View, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137330666","display_name":"Tao Chen","orcid":"https://orcid.org/0009-0004-7806-4503"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao Chen","raw_affiliation_strings":["Independent Researcher, Frisco, USA"],"raw_orcid":"https://orcid.org/0000-0003-4565-5548","affiliations":[{"raw_affiliation_string":"Independent Researcher, Frisco, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017671322","display_name":"Qun Song","orcid":"https://orcid.org/0000-0002-3611-9404"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Qun Song","raw_affiliation_strings":["City University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-3611-9404","affiliations":[{"raw_affiliation_string":"City University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":29.9726,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.99485709,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"492","last_page":"505"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.7358999848365784,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.7358999848365784,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.16609999537467957,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.01850000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simultaneous-localization-and-mapping","display_name":"Simultaneous localization and mapping","score":0.59170001745224},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.5311999917030334},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.45910000801086426},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.43709999322891235},{"id":"https://openalex.org/keywords/inertial-measurement-unit","display_name":"Inertial measurement unit","score":0.40059998631477356},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.38530001044273376},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.35899999737739563},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.357699990272522}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7613000273704529},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6657999753952026},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6144000291824341},{"id":"https://openalex.org/C86369673","wikidata":"https://www.wikidata.org/wiki/Q1203659","display_name":"Simultaneous localization and mapping","level":4,"score":0.59170001745224},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5311999917030334},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.45910000801086426},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.43709999322891235},{"id":"https://openalex.org/C79061980","wikidata":"https://www.wikidata.org/wiki/Q941680","display_name":"Inertial measurement unit","level":2,"score":0.40059998631477356},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.38530001044273376},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.35899999737739563},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.357699990272522},{"id":"https://openalex.org/C136643341","wikidata":"https://www.wikidata.org/wiki/Q1361526","display_name":"Reachability","level":2,"score":0.3303000032901764},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.31949999928474426},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.26600000262260437},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.25369998812675476},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3745756.3809218","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3745756.3809218","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th Annual International Conference on Mobile Systems, Applications and Services","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3745756.3809218","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3745756.3809218","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th Annual International Conference on Mobile Systems, Applications and Services","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W151377110","https://openalex.org/W1656165940","https://openalex.org/W1977687024","https://openalex.org/W2032618685","https://openalex.org/W2036946260","https://openalex.org/W2051376734","https://openalex.org/W2080842580","https://openalex.org/W2088136988","https://openalex.org/W2123430619","https://openalex.org/W2131822572","https://openalex.org/W2138621090","https://openalex.org/W2170102584","https://openalex.org/W2172093107","https://openalex.org/W2194775991","https://openalex.org/W2461937780","https://openalex.org/W2525989248","https://openalex.org/W2624849939","https://openalex.org/W2775147633","https://openalex.org/W2799914042","https://openalex.org/W2807015669","https://openalex.org/W2824103177","https://openalex.org/W2890216513","https://openalex.org/W3023214412","https://openalex.org/W3047014196","https://openalex.org/W3122878979","https://openalex.org/W3201513527","https://openalex.org/W4312912313","https://openalex.org/W4317927950","https://openalex.org/W4383108296","https://openalex.org/W4390873312","https://openalex.org/W4393032581","https://openalex.org/W4394625608","https://openalex.org/W4404034597"],"related_works":[],"abstract_inverted_index":{"Existing":[0],"acoustic":[1,28,52,85,167],"indoor":[2],"location":[3],"sensing":[4],"systems":[5],"are":[6],"limited":[7],"by":[8,116],"the":[9,56,84,129,141],"need":[10],"for":[11,100],"extensive":[12],"data":[13],"collection":[14],"and":[15,31,123,169,178,192],"model":[16,43],"retraining":[17],"in":[18,39,67,136],"unseen":[19,40,201],"environments.":[20,202],"This":[21],"paper":[22],"introduces":[23],"ZA-SLAM,":[24],"a":[25,105,185,193],"novel":[26],"zero-shot":[27,70,176],"Simultaneous":[29],"Localization":[30],"Mapping":[32],"(SLAM)":[33],"system":[34,160,183],"that":[35,54,109,146,158],"can":[36],"be":[37],"deployed":[38],"environments":[41],"without":[42],"retraining.":[44],"Our":[45,182],"core":[46],"idea":[47],"is":[48],"to":[49,82,87,165,174],"train":[50],"an":[51],"encoder":[53,86],"inherits":[55],"generalization":[57],"capabilities":[58],"of":[59,131,189,196],"pre-trained":[60],"Vision-Language":[61],"Models":[62],"(VLMs),":[63],"which":[64],"show":[65],"superiority":[66],"tasks":[68],"like":[69,118],"visual":[71,92],"SLAM.":[72],"To":[73,96],"achieve":[74],"this":[75],"goal,":[76],"we":[77,103,127],"perform":[78],"Acoustic-Visual":[79],"Feature":[80],"Alignment":[81],"enable":[83],"generate":[88],"features":[89,93],"aligned":[90],"with":[91,140],"from":[94],"VLMs.":[95],"select":[97],"high-quality":[98],"images":[99,114],"effective":[101],"alignment,":[102],"design":[104],"Semantic-Guided":[106],"Image":[107],"Selection":[108],"filters":[110],"out":[111],"low-quality":[112],"collected":[113],"caused":[115],"factors":[117],"abrupt":[119],"view":[120],"changes,":[121],"occlusions,":[122],"uninformative":[124],"views.":[125],"Furthermore,":[126],"address":[128],"challenge":[130],"false":[132],"positive":[133],"loop":[134,148],"closures":[135,149],"structurally":[137],"similar":[138],"locations":[139],"Learning-Based":[142],"Trajectory":[143],"Reachability":[144],"Matching":[145],"validates":[147],"leveraging":[150],"IMU":[151],"trajectory":[152],"features.":[153],"Extensive":[154],"real-world":[155],"experiments":[156],"demonstrate":[157],"our":[159],"achieves":[161,184],"comparable":[162],"SLAM":[163,180],"performance":[164,172],"retraining-based":[166],"SLAM,":[168],"much":[170],"improved":[171],"compared":[173],"existing":[175],"Wi-Fi":[177],"geomagnetic":[179],"systems.":[181],"mean":[186],"mapping":[187],"error":[188,195],"0.56":[190],"m":[191,198],"localization":[194],"0.78":[197],"across":[199],"multiple":[200]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-30T00:00:00"}
