{"id":"https://openalex.org/W4391496571","doi":"https://doi.org/10.1109/wcsp58612.2023.10404673","title":"Resnet-Conformer Network using Multi-Scale Channel Attention for Sound Event Localization and Detection in Real Scenes","display_name":"Resnet-Conformer Network using Multi-Scale Channel Attention for Sound Event Localization and Detection in Real Scenes","publication_year":2023,"publication_date":"2023-11-02","ids":{"openalex":"https://openalex.org/W4391496571","doi":"https://doi.org/10.1109/wcsp58612.2023.10404673"},"language":"en","primary_location":{"id":"doi:10.1109/wcsp58612.2023.10404673","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/wcsp58612.2023.10404673","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Conference on Wireless Communications and Signal Processing (WCSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100584008","display_name":"Xue Li-hua","orcid":null},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lihua Xue","raw_affiliation_strings":["Chongqing University of Posts and Telecommunications,School of Communication and Information Engineering,Chongqing,China","School of Communication and Information Engineering, Chongqing University of Posts and Telecommunications, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"Chongqing University of Posts and Telecommunications,School of Communication and Information Engineering,Chongqing,China","institution_ids":["https://openalex.org/I10535382"]},{"raw_affiliation_string":"School of Communication and Information Engineering, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102001094","display_name":"Hongqing Liu","orcid":"https://orcid.org/0000-0002-2069-0390"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongqing Liu","raw_affiliation_strings":["Chongqing University of Posts and Telecommunications,Intelligent Speech and Audio Research Lab.,Chongqing,China","Intelligent Speech and Audio Research Lab., Chongqing University of Posts and Telecommunications, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"Chongqing University of Posts and Telecommunications,Intelligent Speech and Audio Research Lab.,Chongqing,China","institution_ids":["https://openalex.org/I10535382"]},{"raw_affiliation_string":"Intelligent Speech and Audio Research Lab., Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005083930","display_name":"Yi Zhou","orcid":"https://orcid.org/0000-0001-7445-226X"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Zhou","raw_affiliation_strings":["Chongqing University of Posts and Telecommunications,Intelligent Speech and Audio Research Lab.,Chongqing,China","Intelligent Speech and Audio Research Lab., Chongqing University of Posts and Telecommunications, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"Chongqing University of Posts and Telecommunications,Intelligent Speech and Audio Research Lab.,Chongqing,China","institution_ids":["https://openalex.org/I10535382"]},{"raw_affiliation_string":"Intelligent Speech and Audio Research Lab., Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087344550","display_name":"Lu Gan","orcid":"https://orcid.org/0000-0002-1811-1011"},"institutions":[{"id":"https://openalex.org/I59433898","display_name":"Brunel University of London","ror":"https://ror.org/00dn4t376","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I59433898"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Lu Gan","raw_affiliation_strings":["Brunel University,College of Engineering, Design and Physical Science,London,U.K","College of Engineering, Design and Physical Science, Brunel University, London, U.K"],"affiliations":[{"raw_affiliation_string":"Brunel University,College of Engineering, Design and Physical Science,London,U.K","institution_ids":[]},{"raw_affiliation_string":"College of Engineering, Design and Physical Science, Brunel University, London, U.K","institution_ids":["https://openalex.org/I59433898"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100584008"],"corresponding_institution_ids":["https://openalex.org/I10535382"],"apc_list":null,"apc_paid":null,"fwci":0.4075,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.61020344,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9606999754905701,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.626916766166687},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5874601602554321},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.575806736946106},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5068140625953674},{"id":"https://openalex.org/keywords/sound","display_name":"Sound (geography)","score":0.4973643124103546},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.4724355638027191},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3587143123149872},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.19218701124191284},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.1303030252456665},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.1168232262134552},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.10823240876197815},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07720372080802917}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.626916766166687},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5874601602554321},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.575806736946106},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5068140625953674},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.4973643124103546},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.4724355638027191},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3587143123149872},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.19218701124191284},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.1303030252456665},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.1168232262134552},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.10823240876197815},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07720372080802917},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wcsp58612.2023.10404673","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/wcsp58612.2023.10404673","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Conference on Wireless Communications and Signal Processing (WCSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6899999976158142,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2518102674","https://openalex.org/W2593116425","https://openalex.org/W2794506738","https://openalex.org/W2810934215","https://openalex.org/W2883787264","https://openalex.org/W2994088087","https://openalex.org/W2998139081","https://openalex.org/W3083274258","https://openalex.org/W3094897602","https://openalex.org/W3117314925","https://openalex.org/W3163193264","https://openalex.org/W3163881933","https://openalex.org/W3206329344","https://openalex.org/W3217664629","https://openalex.org/W4205689591","https://openalex.org/W4221141618","https://openalex.org/W4225293011","https://openalex.org/W4282031641","https://openalex.org/W4324116353","https://openalex.org/W4366999556","https://openalex.org/W4372263497","https://openalex.org/W4385245566","https://openalex.org/W4393710905","https://openalex.org/W6739901393","https://openalex.org/W6758684365","https://openalex.org/W6770979763","https://openalex.org/W6838853441","https://openalex.org/W6852063634","https://openalex.org/W6862861010"],"related_works":["https://openalex.org/W2909726438","https://openalex.org/W2067046791","https://openalex.org/W2909888262","https://openalex.org/W2056769785","https://openalex.org/W2025747832","https://openalex.org/W3020957235","https://openalex.org/W611303608","https://openalex.org/W4313595395","https://openalex.org/W2312783472","https://openalex.org/W2909748163"],"abstract_inverted_index":{"Sound":[0],"Event":[1],"Localization":[2],"and":[3,11,37,81,110,115,127,164,185],"Detection":[4],"(SELD)":[5],"aims":[6],"at":[7],"detecting":[8],"the":[9,15,32,38,46,73,76,79,83,89,105,117,120,125,128,153,167,176,180,187,191],"class":[10],"activity":[12],"time":[13],"of":[14,23,28,34,40,48,78,119,147,155,179],"sound":[16,42],"events,":[17],"while":[18],"estimating":[19],"their":[20],"corresponding":[21],"direction":[22],"arrival":[24],"(DOA).":[25],"The":[26],"difficulties":[27],"SELD":[29],"lie":[30],"in":[31,50,175,190],"change":[33],"acoustic":[35],"scenes":[36],"presence":[39],"overlapping":[41],"events.":[43],"To":[44,67,151],"improve":[45],"performance":[47],"SELD,":[49],"this":[51],"work,":[52],"we":[53,71,97,123,142,158,170],"propose":[54],"a":[55],"Resnet-Conformer":[56],"network":[57],"based":[58],"on":[59],"multi-scale":[60,84],"channel":[61,85,161],"attention":[62,86],"(MSCA-RCnet)":[63],"to":[64,88,92,103,166],"conduct":[65],"SELD.":[66],"that":[68],"end,":[69],"first,":[70],"take":[72],"RCnet":[74],"as":[75],"backbone":[77],"model":[80],"add":[82],"(MSCA)":[87],"Resnet":[90],"block":[91],"capture":[93],"channel-level":[94],"dependencies.":[95],"Second,":[96],"utilize":[98],"attentive":[99],"statistics":[100],"pooling":[101],"(ASP)":[102],"resolve":[104],"mismatch":[106],"between":[107],"label":[108],"resolution":[109,114],"frame-level":[111],"feature":[112],"temporal":[113],"enhance":[116],"discriminability":[118],"features.":[121],"Finally,":[122,169],"integrate":[124],"MSCA-RCnet":[126],"Event-Independent":[129],"Network":[130],"V2":[131],"(EINV2)":[132],"by":[133],"multi-ACCDOA":[134,139],"representation":[135],"into":[136],"an":[137,144],"EINV2-based":[138],"network.":[140],"Subsequently,":[141],"perform":[143],"average":[145],"ensemble":[146],"these":[148],"two":[149],"models.":[150],"address":[152],"shortage":[154],"training":[156],"data,":[157],"employ":[159],"audio":[160],"swapping":[162],"(ACS)":[163],"AugMix":[165],"dataset.":[168],"evaluate":[171],"our":[172],"proposed":[173],"systems":[174],"Audio-only":[177],"Track":[178],"DCASE":[181],"2023":[182],"Task":[183],"3":[184],"rank":[186],"second":[188],"place":[189],"competition.":[192]},"counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-12-25T23:11:45.687758","created_date":"2025-10-10T00:00:00"}
