{"id":"https://openalex.org/W4404914582","doi":"https://doi.org/10.1109/access.2024.3510453","title":"w2v-SELD: A Sound Event Localization and Detection Framework for Self-Supervised Spatial Audio Pre-Training","display_name":"w2v-SELD: A Sound Event Localization and Detection Framework for Self-Supervised Spatial Audio Pre-Training","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4404914582","doi":"https://doi.org/10.1109/access.2024.3510453"},"language":"en","primary_location":{"id":"doi:10.1109/access.2024.3510453","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3510453","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2024.3510453","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050538895","display_name":"Orlem Lima dos Santos","orcid":"https://orcid.org/0000-0002-3942-6418"},"institutions":[{"id":"https://openalex.org/I181391015","display_name":"Universidade Estadual de Campinas (UNICAMP)","ror":"https://ror.org/04wffgt70","country_code":"BR","type":"education","lineage":["https://openalex.org/I181391015"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Orlem Lima Dos Santos","raw_affiliation_strings":["Department of Computer Engineering and Industrial Automation, University of Campinas, Campinas, Brazil"],"raw_orcid":"https://orcid.org/0000-0002-3942-6418","affiliations":[{"raw_affiliation_string":"Department of Computer Engineering and Industrial Automation, University of Campinas, Campinas, Brazil","institution_ids":["https://openalex.org/I181391015"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002311406","display_name":"Karen Rosero","orcid":"https://orcid.org/0000-0002-8118-4213"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Karen Rosero","raw_affiliation_strings":["Department of Electrical and Computer Engineering, The University of Texas at Dallas, Richardson, TX, USA"],"raw_orcid":"https://orcid.org/0000-0002-8118-4213","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, The University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030262162","display_name":"Bruno Masiero","orcid":"https://orcid.org/0000-0002-2246-4450"},"institutions":[{"id":"https://openalex.org/I181391015","display_name":"Universidade Estadual de Campinas (UNICAMP)","ror":"https://ror.org/04wffgt70","country_code":"BR","type":"education","lineage":["https://openalex.org/I181391015"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Bruno Masiero","raw_affiliation_strings":["Department of Computer Engineering and Industrial Automation, University of Campinas, Campinas, Brazil"],"raw_orcid":"https://orcid.org/0000-0002-2246-4450","affiliations":[{"raw_affiliation_string":"Department of Computer Engineering and Industrial Automation, University of Campinas, Campinas, Brazil","institution_ids":["https://openalex.org/I181391015"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087970571","display_name":"Roberto Lotufo","orcid":"https://orcid.org/0000-0002-5652-0852"},"institutions":[{"id":"https://openalex.org/I181391015","display_name":"Universidade Estadual de Campinas (UNICAMP)","ror":"https://ror.org/04wffgt70","country_code":"BR","type":"education","lineage":["https://openalex.org/I181391015"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Roberto de Alencar Lotufo","raw_affiliation_strings":["Department of Computer Engineering and Industrial Automation, University of Campinas, Campinas, Brazil"],"raw_orcid":"https://orcid.org/0000-0002-5652-0852","affiliations":[{"raw_affiliation_string":"Department of Computer Engineering and Industrial Automation, University of Campinas, Campinas, Brazil","institution_ids":["https://openalex.org/I181391015"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5050538895"],"corresponding_institution_ids":["https://openalex.org/I181391015"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":1.6197,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.85103785,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"12","issue":null,"first_page":"181553","last_page":"181569"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.779541015625},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5849807262420654},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.5844365358352661},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5841187238693237},{"id":"https://openalex.org/keywords/sound","display_name":"Sound (geography)","score":0.4924498498439789},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47194212675094604},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3299141526222229},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.07756450772285461}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.779541015625},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5849807262420654},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5844365358352661},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5841187238693237},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.4924498498439789},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47194212675094604},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3299141526222229},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.07756450772285461},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2024.3510453","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3510453","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:3a4d9e8f878e43c28d86ff4a30d798e1","is_oa":true,"landing_page_url":"https://doaj.org/article/3a4d9e8f878e43c28d86ff4a30d798e1","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 12, Pp 181553-181569 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2024.3510453","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3510453","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321091","display_name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior","ror":"https://ror.org/00x0ma614"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2038484192","https://openalex.org/W2127141656","https://openalex.org/W2194775991","https://openalex.org/W2408239454","https://openalex.org/W2531409750","https://openalex.org/W2593116425","https://openalex.org/W2608422710","https://openalex.org/W2810934215","https://openalex.org/W2896457183","https://openalex.org/W2936774411","https://openalex.org/W2982382207","https://openalex.org/W2982429715","https://openalex.org/W3009685637","https://openalex.org/W3095717210","https://openalex.org/W3097777922","https://openalex.org/W3116084609","https://openalex.org/W3144223439","https://openalex.org/W3163193264","https://openalex.org/W3198730349","https://openalex.org/W3206189675","https://openalex.org/W3212486120","https://openalex.org/W3215259255","https://openalex.org/W4205689591","https://openalex.org/W4220707304","https://openalex.org/W4221141618","https://openalex.org/W4225270933","https://openalex.org/W4236344233","https://openalex.org/W4287766186","https://openalex.org/W4313419048","https://openalex.org/W4324116353","https://openalex.org/W4372260505","https://openalex.org/W4372263497","https://openalex.org/W4375869379","https://openalex.org/W4380032323","https://openalex.org/W4385245566","https://openalex.org/W4392885584","https://openalex.org/W4392903703","https://openalex.org/W4394783925","https://openalex.org/W4395029683","https://openalex.org/W6753018729","https://openalex.org/W6755207826","https://openalex.org/W6779923105","https://openalex.org/W6780218876","https://openalex.org/W6804656293"],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2233261550","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2997094352","https://openalex.org/W2909726438","https://openalex.org/W2067046791","https://openalex.org/W3216976533","https://openalex.org/W100620283","https://openalex.org/W2495260952"],"abstract_inverted_index":{"Sound":[0],"Event":[1],"Localization":[2],"and":[3,20,27,49,127,132,143,181],"Detection":[4,126],"(SELD)":[5],"is":[6],"a":[7,63,92,110,137,144],"critical":[8],"challenge":[9],"in":[10,32,85,175],"various":[11],"industrial":[12,176],"applications,":[13],"such":[14],"as":[15],"autonomous":[16],"systems,":[17],"smart":[18],"cities,":[19],"audio":[21,44,83,100],"surveillance,":[22],"which":[23,46],"require":[24],"accurate":[25],"identification":[26],"localization":[28],"of":[29,129,184],"sound":[30,75],"events":[31],"complex":[33],"environments.":[34],"Traditional":[35],"supervised":[36,162],"approaches":[37],"heavily":[38],"rely":[39],"on":[40,96,125,147,172],"large,":[41,97],"annotated":[42],"multichannel":[43],"datasets,":[45],"are":[47,188],"expensive":[48],"time-consuming":[50],"to":[51,72,102,152,167],"produce.":[52],"This":[53],"paper":[54],"addresses":[55],"this":[56],"limitation":[57],"by":[58,107],"introducing":[59],"the":[60,68,170],"w2v-SELD":[61,120,156,186],"architecture,":[62],"self-supervised":[64],"model":[65,90,157,187],"adapted":[66],"from":[67,79],"wav2vec":[69],"2.0":[70],"framework":[71],"learn":[73],"effective":[74],"event":[76],"representations":[77],"directly":[78],"raw,":[80],"unlabeled":[81,98],"3D":[82,99],"data":[84,174],"ambisonics":[86],"format.":[87],"The":[88,155,179],"proposed":[89],"follows":[91],"two-stage":[93],"process:":[94],"pre-training":[95],"datasets":[101],"capture":[103],"high-level":[104],"features,":[105],"followed":[106],"fine-tuning":[108],"with":[109,150,160],"smaller,":[111],"labeled":[112,173],"SELD":[113,177],"dataset.":[114],"Experimental":[115],"results":[116],"show":[117],"that":[118],"our":[119,185],"method":[121],"outperforms":[122],"baseline":[123,153],"models":[124],"Classification":[128],"Acoustic":[130],"Scenes":[131],"Events":[133],"(DCASE)":[134],"challenges,":[135],"achieving":[136],"66%":[138],"improvement":[139,146],"for":[140],"DCASE":[141,148],"TAU-2019":[142],"57%":[145],"TAU-2020":[149],"respect":[151],"systems.":[154],"performs":[158],"competitively":[159],"state-of-the-art":[161],"methods,":[163],"highlighting":[164],"its":[165],"potential":[166],"significantly":[168],"reduce":[169],"dependency":[171],"applications.":[178],"code":[180],"pre-trained":[182],"parameters":[183],"available":[189],"online.":[190]},"counts_by_year":[{"year":2025,"cited_by_count":5}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
