{"id":"https://openalex.org/W3167781088","doi":"https://doi.org/10.21437/interspeech.2021-124","title":"PILOT: Introducing Transformers for Probabilistic Sound Event Localization","display_name":"PILOT: Introducing Transformers for Probabilistic Sound Event Localization","publication_year":2021,"publication_date":"2021-08-27","ids":{"openalex":"https://openalex.org/W3167781088","doi":"https://doi.org/10.21437/interspeech.2021-124","mag":"3167781088"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2021-124","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-124","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2106.03903","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035233948","display_name":"Christopher Schymura","orcid":"https://orcid.org/0000-0002-6128-3556"},"institutions":[{"id":"https://openalex.org/I904495901","display_name":"Ruhr University Bochum","ror":"https://ror.org/04tsk2644","country_code":"DE","type":"education","lineage":["https://openalex.org/I904495901"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Christopher Schymura","raw_affiliation_strings":["Ruhr University Bochum, Bochum, Germany"],"affiliations":[{"raw_affiliation_string":"Ruhr University Bochum, Bochum, Germany","institution_ids":["https://openalex.org/I904495901"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075110751","display_name":"Benedikt B\u00f6nninghoff","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Benedikt B\u00f6nninghoff","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070624983","display_name":"Tsubasa Ochiai","orcid":"https://orcid.org/0000-0002-2519-2032"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tsubasa Ochiai","raw_affiliation_strings":["NTT (Japan), Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"NTT (Japan), Tokyo, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023868166","display_name":"Marc Delcroix","orcid":"https://orcid.org/0000-0002-5175-7834"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Marc Delcroix","raw_affiliation_strings":["NTT (Japan), Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"NTT (Japan), Tokyo, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069398831","display_name":"Keisuke Kinoshita","orcid":"https://orcid.org/0009-0008-7987-8188"},"institutions":[{"id":"https://openalex.org/I206945453","display_name":"Paderborn University","ror":"https://ror.org/058kzsd48","country_code":"DE","type":"education","lineage":["https://openalex.org/I206945453"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Keisuke Kinoshita","raw_affiliation_strings":["Paderborn University, Paderborn, Germany"],"affiliations":[{"raw_affiliation_string":"Paderborn University, Paderborn, Germany","institution_ids":["https://openalex.org/I206945453"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021240106","display_name":"Tomohiro Nakatani","orcid":"https://orcid.org/0000-0002-7487-7150"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tomohiro Nakatani","raw_affiliation_strings":["NTT (Japan), Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"NTT (Japan), Tokyo, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009309584","display_name":"Shoko Araki","orcid":"https://orcid.org/0000-0003-4363-4305"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shoko Araki","raw_affiliation_strings":["NTT (Japan), Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"NTT (Japan), Tokyo, Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007017640","display_name":"Dorothea Kolossa","orcid":"https://orcid.org/0000-0003-0678-3053"},"institutions":[{"id":"https://openalex.org/I904495901","display_name":"Ruhr University Bochum","ror":"https://ror.org/04tsk2644","country_code":"DE","type":"education","lineage":["https://openalex.org/I904495901"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Dorothea Kolossa","raw_affiliation_strings":["Ruhr University Bochum, Bochum, Germany"],"affiliations":[{"raw_affiliation_string":"Ruhr University Bochum, Bochum, Germany","institution_ids":["https://openalex.org/I904495901"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5035233948"],"corresponding_institution_ids":["https://openalex.org/I904495901"],"apc_list":null,"apc_paid":null,"fwci":0.7695,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.70431515,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7445498704910278},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6913345456123352},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.5947983860969543},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5308578610420227},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49055665731430054},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.462272971868515},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4608989953994751},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.4502614140510559},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.43099892139434814},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.38537082076072693},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11170250177383423},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.10137581825256348}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7445498704910278},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6913345456123352},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5947983860969543},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5308578610420227},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49055665731430054},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.462272971868515},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4608989953994751},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.4502614140510559},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.43099892139434814},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38537082076072693},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11170250177383423},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10137581825256348},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/interspeech.2021-124","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-124","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2106.03903","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.03903","pdf_url":"https://arxiv.org/pdf/2106.03903","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3167781088","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2106.03903.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2106.03903","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2106.03903","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2106.03903","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.03903","pdf_url":"https://arxiv.org/pdf/2106.03903","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3167781088.pdf","grobid_xml":"https://content.openalex.org/works/W3167781088.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W1503398984","https://openalex.org/W1555217905","https://openalex.org/W1677182931","https://openalex.org/W1959608418","https://openalex.org/W1972978214","https://openalex.org/W2063708263","https://openalex.org/W2064675550","https://openalex.org/W2113638573","https://openalex.org/W2130989868","https://openalex.org/W2141398670","https://openalex.org/W2145303093","https://openalex.org/W2145652649","https://openalex.org/W2157331557","https://openalex.org/W2159346755","https://openalex.org/W2168745915","https://openalex.org/W2222512263","https://openalex.org/W2336416123","https://openalex.org/W2395463083","https://openalex.org/W2460742184","https://openalex.org/W2611943505","https://openalex.org/W2626778328","https://openalex.org/W2772736377","https://openalex.org/W2772956764","https://openalex.org/W2800100150","https://openalex.org/W2810934215","https://openalex.org/W2908510526","https://openalex.org/W2912752824","https://openalex.org/W2963371290","https://openalex.org/W2964342924","https://openalex.org/W2986631036","https://openalex.org/W3100871498"],"related_works":["https://openalex.org/W3198730349","https://openalex.org/W2891155820","https://openalex.org/W2253052170","https://openalex.org/W3184958222","https://openalex.org/W2963490617","https://openalex.org/W3011514609","https://openalex.org/W2074506198","https://openalex.org/W2968889683","https://openalex.org/W3010237784","https://openalex.org/W1763370240","https://openalex.org/W3200652663","https://openalex.org/W2094215757","https://openalex.org/W1996250091","https://openalex.org/W2625145450","https://openalex.org/W3027847923","https://openalex.org/W2743712389","https://openalex.org/W3047109833","https://openalex.org/W3042219597","https://openalex.org/W2760117852","https://openalex.org/W2125280635"],"abstract_inverted_index":{"Sound":[0],"event":[1,61,82,120,134],"localization":[2,62,121,131],"aims":[3],"at":[4],"estimating":[5],"the":[6,12,40,68,79],"positions":[7,83],"of":[8,42,94,130],"sound":[9,60,81,119],"sources":[10],"in":[11,26,67,128,149],"environment":[13],"with":[14,145],"respect":[15],"to":[16,49],"an":[17,91],"acoustic":[18],"receiver":[19],"(e.g.":[20],"a":[21,46,57],"microphone":[22],"array).":[23],"Recent":[24],"advances":[25],"this":[27,54,105],"domain":[28],"most":[29],"prominently":[30],"focused":[31],"on":[32,114,142],"utilizing":[33],"deep":[34,100],"recurrent":[35,51],"neural":[36,52],"networks.":[37],"Inspired":[38],"by":[39],"success":[41],"transformer":[43],"architectures":[44],"as":[45,86],"suitable":[47],"alternative":[48],"classical":[50],"networks,":[53],"paper":[55],"introduces":[56],"novel":[58],"transformer-based":[59],"framework,":[63],"where":[64],"temporal":[65],"dependencies":[66],"received":[69],"multi-channel":[70],"audio":[71],"signals":[72],"are":[73,84],"captured":[74],"via":[75],"self-attention":[76],"mechanisms.":[77],"Additionally,":[78],"estimated":[80],"represented":[85],"multivariate":[87],"Gaussian":[88],"variables,":[89],"yielding":[90],"additional":[92],"notion":[93],"uncertainty,":[95],"which":[96],"many":[97],"previously":[98],"proposed":[99],"learning-based":[101],"systems":[102,141],"designed":[103],"for":[104],"application":[106],"do":[107],"not":[108],"provide.":[109],"The":[110],"framework":[111],"is":[112],"evaluated":[113],"three":[115],"publicly":[116],"available":[117],"multi-source":[118],"datasets":[122,144],"and":[123,133],"compared":[124],"against":[125],"state-of-the-art":[126],"methods":[127],"terms":[129],"error":[132],"detection":[135],"accuracy.":[136],"It":[137],"outperforms":[138],"all":[139,143],"competing":[140],"statistical":[146],"significant":[147],"differences":[148],"performance.":[150]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
