{"id":"https://openalex.org/W4405305619","doi":"https://doi.org/10.1109/ipin62893.2024.10786105","title":"wav2pos: Sound Source Localization using Masked Autoencoders","display_name":"wav2pos: Sound Source Localization using Masked Autoencoders","publication_year":2024,"publication_date":"2024-10-14","ids":{"openalex":"https://openalex.org/W4405305619","doi":"https://doi.org/10.1109/ipin62893.2024.10786105"},"language":"en","primary_location":{"id":"doi:10.1109/ipin62893.2024.10786105","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipin62893.2024.10786105","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 14th International Conference on Indoor Positioning and Indoor Navigation (IPIN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025118280","display_name":"Axel Berg","orcid":"https://orcid.org/0000-0003-4401-989X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Axel Berg","raw_affiliation_strings":["Lund University,Computer Vision and Machine Learning, Centre for Mathematical Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lund University,Computer Vision and Machine Learning, Centre for Mathematical Sciences","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094178937","display_name":"Jens Gulin","orcid":"https://orcid.org/0000-0002-3656-759X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jens Gulin","raw_affiliation_strings":["Lund University,Computer Vision and Machine Learning, Centre for Mathematical Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lund University,Computer Vision and Machine Learning, Centre for Mathematical Sciences","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110106480","display_name":"Mark O\u2019Connor","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mark O\u2019Connor","raw_affiliation_strings":["Tenstorrent"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tenstorrent","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051610883","display_name":"Chuteng Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chuteng Zhou","raw_affiliation_strings":["Lund University,Computer Vision and Machine Learning, Centre for Mathematical Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lund University,Computer Vision and Machine Learning, Centre for Mathematical Sciences","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Karl \u00c5str\u00f6m","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karl \u00c5str\u00f6m","raw_affiliation_strings":["Lund University,Computer Vision and Machine Learning, Centre for Mathematical Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lund University,Computer Vision and Machine Learning, Centre for Mathematical Sciences","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026331205","display_name":"Magnus Oskarsson","orcid":"https://orcid.org/0000-0002-1789-8094"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Magnus Oskarsson","raw_affiliation_strings":["Lund University,Computer Vision and Machine Learning, Centre for Mathematical Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lund University,Computer Vision and Machine Learning, Centre for Mathematical Sciences","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.5286,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.842122,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9832000136375427,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9832000136375427,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.957099974155426,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9563999772071838,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6301192045211792},{"id":"https://openalex.org/keywords/sound","display_name":"Sound (geography)","score":0.6099233627319336},{"id":"https://openalex.org/keywords/acoustic-source-localization","display_name":"Acoustic source localization","score":0.5647214651107788},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4115092158317566},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35790103673934937},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.3172318637371063},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07618275284767151}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6301192045211792},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.6099233627319336},{"id":"https://openalex.org/C93240960","wikidata":"https://www.wikidata.org/wiki/Q217270","display_name":"Acoustic source localization","level":3,"score":0.5647214651107788},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4115092158317566},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35790103673934937},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.3172318637371063},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07618275284767151}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ipin62893.2024.10786105","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipin62893.2024.10786105","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 14th International Conference on Indoor Positioning and Indoor Navigation (IPIN)","raw_type":"proceedings-article"},{"id":"pmh:oai:lup.lub.lu.se:4b3af846-795b-4ac7-956f-6aded73bc4e1","is_oa":false,"landing_page_url":"https://lup.lub.lu.se/record/4b3af846-795b-4ac7-956f-6aded73bc4e1","pdf_url":null,"source":{"id":"https://openalex.org/S4306400536","display_name":"Lund University Publications (Lund University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I187531555","host_organization_name":"Lund University","host_organization_lineage":["https://openalex.org/I187531555"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISSN: 2162-7347","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1971222568","https://openalex.org/W2039715928","https://openalex.org/W2044541911","https://openalex.org/W2046317813","https://openalex.org/W2104866265","https://openalex.org/W2105562580","https://openalex.org/W2107376953","https://openalex.org/W2133353795","https://openalex.org/W2146270091","https://openalex.org/W2157229366","https://openalex.org/W2168298782","https://openalex.org/W2734809623","https://openalex.org/W2763188033","https://openalex.org/W2884822476","https://openalex.org/W2936950132","https://openalex.org/W3034751264","https://openalex.org/W3083121768","https://openalex.org/W3196976979","https://openalex.org/W3197097128","https://openalex.org/W4205570948","https://openalex.org/W4297841729","https://openalex.org/W4297841783","https://openalex.org/W4312096470","https://openalex.org/W4313156423","https://openalex.org/W4372346897","https://openalex.org/W4372347128","https://openalex.org/W4375869206","https://openalex.org/W4385245566","https://openalex.org/W4385822984","https://openalex.org/W4385823179","https://openalex.org/W4385823271","https://openalex.org/W4386289668","https://openalex.org/W4401414989","https://openalex.org/W6638667902","https://openalex.org/W6755207826","https://openalex.org/W6755977528","https://openalex.org/W6757817989","https://openalex.org/W6780226713","https://openalex.org/W6838540985","https://openalex.org/W6840200333"],"related_works":["https://openalex.org/W4318566916","https://openalex.org/W3054897025","https://openalex.org/W2905044649","https://openalex.org/W2087073758","https://openalex.org/W1541492535","https://openalex.org/W2802930369","https://openalex.org/W1989730649","https://openalex.org/W4389372176","https://openalex.org/W2043583277","https://openalex.org/W2567489220"],"abstract_inverted_index":{"We":[0,93],"present":[1],"a":[2,21,27,44,69,83],"novel":[3],"approach":[4,62],"to":[5,114],"the":[6,51,59,66],"3D":[7],"sound":[8,52],"source":[9],"localization":[10,49,121],"task":[11],"for":[12,47],"distributed":[13],"ad-hoc":[14],"microphone":[15,38,89],"arrays":[16],"by":[17,54],"formulating":[18],"it":[19],"as":[20],"set-to-set":[22],"regression":[23],"problem.":[24],"By":[25],"training":[26],"multi-modal":[28],"masked":[29,57],"autoencoder":[30],"model":[31,71],"that":[32,42,68],"operates":[33],"on":[34,97],"audio":[35,86],"recordings":[36,87,101],"and":[37,88,99,104,109,117],"coordinates,":[39],"we":[40],"show":[41],"such":[43],"formulation":[45],"allows":[46],"accurate":[48],"of":[50,79,85,102],"source,":[53],"reconstructing":[55],"coordinates":[56,90],"in":[58,65,106],"input.":[60],"Our":[61],"is":[63],"flexible":[64],"sense":[67],"single":[70],"can":[72],"be":[73],"used":[74],"with":[75],"an":[76],"arbitrary":[77],"number":[78],"microphones,":[80],"even":[81],"when":[82],"subset":[84],"are":[91],"missing.":[92],"test":[94],"our":[95],"method":[96],"simulated":[98],"real-world":[100],"music":[103],"speech":[105],"indoor":[107],"environments,":[108],"demonstrate":[110],"competitive":[111],"performance":[112],"compared":[113],"both":[115],"classical":[116],"other":[118],"learning":[119],"based":[120],"methods.":[122]},"counts_by_year":[{"year":2025,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
