{"id":"https://openalex.org/W4221141618","doi":"https://doi.org/10.1109/icassp43922.2022.9747283","title":"A Track-Wise Ensemble Event Independent Network for Polyphonic Sound Event Localization and Detection","display_name":"A Track-Wise Ensemble Event Independent Network for Polyphonic Sound Event Localization and Detection","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4221141618","doi":"https://doi.org/10.1109/icassp43922.2022.9747283"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747283","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747283","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047649448","display_name":"Jinbo Hu","orcid":"https://orcid.org/0000-0003-3537-0207"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jinbo Hu","raw_affiliation_strings":["Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research, Institute of Acoustics,Beijing,China","University of Chinese Academy of Sciences, Beijing, China","Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research, Institute of Acoustics,Beijing,China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101590005","display_name":"Yin Cao","orcid":"https://orcid.org/0000-0001-9086-7027"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yin Cao","raw_affiliation_strings":["University of Surrey,Centre for Vision, Speech and Signal Processing (CVSSP),UK","Centre for Vision, Speech and Signal Processing (CVSSP), University of Surrey, UK"],"affiliations":[{"raw_affiliation_string":"University of Surrey,Centre for Vision, Speech and Signal Processing (CVSSP),UK","institution_ids":["https://openalex.org/I28290843"]},{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing (CVSSP), University of Surrey, UK","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100669887","display_name":"Ming Wu","orcid":"https://orcid.org/0000-0002-3582-4881"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Wu","raw_affiliation_strings":["Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research, Institute of Acoustics,Beijing,China","Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research, Institute of Acoustics,Beijing,China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072482416","display_name":"Qiuqiang Kong","orcid":"https://orcid.org/0000-0003-2864-0475"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiuqiang Kong","raw_affiliation_strings":["ByteDance,Shanghai,China","ByteDance, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"ByteDance,Shanghai,China","institution_ids":[]},{"raw_affiliation_string":"ByteDance, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088185643","display_name":"Feiran Yang","orcid":"https://orcid.org/0000-0002-1734-3785"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feiran Yang","raw_affiliation_strings":["Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research, Institute of Acoustics,Beijing,China","Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research, Institute of Acoustics,Beijing,China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066967599","display_name":"Mark D. Plumbley","orcid":"https://orcid.org/0000-0002-9708-1075"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mark D. Plumbley","raw_affiliation_strings":["University of Surrey,Centre for Vision, Speech and Signal Processing (CVSSP),UK","Centre for Vision, Speech and Signal Processing (CVSSP), University of Surrey, UK"],"affiliations":[{"raw_affiliation_string":"University of Surrey,Centre for Vision, Speech and Signal Processing (CVSSP),UK","institution_ids":["https://openalex.org/I28290843"]},{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing (CVSSP), University of Surrey, UK","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101801967","display_name":"Jun Yang","orcid":"https://orcid.org/0000-0002-4901-8530"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yang","raw_affiliation_strings":["Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research, Institute of Acoustics,Beijing,China","Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Key Laboratory of Noise and Vibration Research, Institute of Acoustics,Beijing,China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Key Laboratory of Noise and Vibration Research, Institute of Acoustics, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5047649448"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210099069","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":2.2092,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.90236967,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"9196","last_page":"9200"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7808915376663208},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7193475961685181},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.6702573895454407},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.49378499388694763},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.49320119619369507},{"id":"https://openalex.org/keywords/permutation","display_name":"Permutation (music)","score":0.4899424910545349},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44501131772994995},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41133877635002136},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33248648047447205}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7808915376663208},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7193475961685181},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.6702573895454407},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.49378499388694763},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.49320119619369507},{"id":"https://openalex.org/C21308566","wikidata":"https://www.wikidata.org/wiki/Q7169365","display_name":"Permutation (music)","level":2,"score":0.4899424910545349},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44501131772994995},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41133877635002136},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33248648047447205},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747283","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747283","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:alma.44SUR_INST:11158470170002346","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4210197018","display_name":"View","issn_l":"2688-268X","issn":["2688-268X","2688-3988"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8857457899","display_name":null,"funder_award_id":"EP/T019751/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W28412257","https://openalex.org/W2765407302","https://openalex.org/W2810934215","https://openalex.org/W2917254586","https://openalex.org/W2936774411","https://openalex.org/W2942551338","https://openalex.org/W2963446712","https://openalex.org/W2982382207","https://openalex.org/W2982680886","https://openalex.org/W2994088087","https://openalex.org/W2998139081","https://openalex.org/W3005680577","https://openalex.org/W3091667472","https://openalex.org/W3094509890","https://openalex.org/W3097777922","https://openalex.org/W3098454764","https://openalex.org/W3120252178","https://openalex.org/W3163881933","https://openalex.org/W3176079376","https://openalex.org/W3177143793","https://openalex.org/W3203177955","https://openalex.org/W4225270933","https://openalex.org/W6745136726","https://openalex.org/W6770979763","https://openalex.org/W6774314701","https://openalex.org/W6784117923","https://openalex.org/W6788349323","https://openalex.org/W6797253786","https://openalex.org/W6797669297","https://openalex.org/W6802044352"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W2011227383","https://openalex.org/W4375868962","https://openalex.org/W2088854863","https://openalex.org/W1976719989","https://openalex.org/W2942893872","https://openalex.org/W2065606036","https://openalex.org/W3179495260","https://openalex.org/W3127543252","https://openalex.org/W4298036227"],"abstract_inverted_index":{"Polyphonic":[0],"sound":[1,12],"event":[2,27],"localization":[3],"and":[4,18,50,56,115,135],"detection":[5],"(SELD)":[6],"aims":[7],"at":[8],"detecting":[9],"types":[10],"of":[11,99,102,131],"events":[13],"with":[14,30,63,141],"corresponding":[15],"temporal":[16],"activities":[17],"spatial":[19],"locations.":[20],"In":[21],"this":[22],"paper,":[23],"a":[24,31,142],"trackwise":[25],"ensemble":[26,61,72],"independent":[28],"network":[29],"novel":[32],"data":[33,88,93,104],"augmentation":[34,89,94,105],"method":[35,108,127],"is":[36,41,51,67,150],"proposed.":[37],"The":[38,59,87,107],"proposed":[39,46,68,126],"model":[40,62,73],"based":[42],"on":[43],"our":[44,125],"previous":[45],"Event-Independent":[47],"Network":[48],"V2":[49],"extended":[52],"by":[53],"conformer":[54],"blocks":[55],"dense":[57],"blocks.":[58],"track-wise":[60,64,76],"output":[65,77],"format":[66,78],"to":[69,145],"solve":[70],"an":[71],"problem":[74],"for":[75,120],"that":[79],"track":[80],"permutation":[81],"may":[82],"occur":[83],"among":[84],"different":[85,121],"models.":[86,122],"approach":[90],"contains":[91],"several":[92,103],"chains,":[95],"which":[96],"are":[97],"composed":[98],"random":[100],"combinations":[101],"operations.":[106],"also":[109],"utilizes":[110],"log-mel":[111],"spectrograms,":[112],"intensity":[113],"vectors,":[114],"Spatial":[116],"Cues-Augmented":[117],"Log-Spectrogram":[118],"(SALSA)":[119],"We":[123],"evaluate":[124],"in":[128],"the":[129,132,137],"Task":[130],"L3DAS22":[133],"challenge":[134],"obtain":[136],"top":[138],"ranking":[139],"solution":[140],"location-dependent":[143],"F-score":[144],"be":[146],"0.699.":[147],"Source":[148],"code":[149],"released":[151],"<sup":[152],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[153],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[154],".":[155]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
