{"id":"https://openalex.org/W7125973849","doi":"https://doi.org/10.1109/smc58881.2025.11343608","title":"ACIL-SED: An Acoustic Clustering and Imbalance Learning Method for Sound Event Detection","display_name":"ACIL-SED: An Acoustic Clustering and Imbalance Learning Method for Sound Event Detection","publication_year":2025,"publication_date":"2025-10-05","ids":{"openalex":"https://openalex.org/W7125973849","doi":"https://doi.org/10.1109/smc58881.2025.11343608"},"language":null,"primary_location":{"id":"doi:10.1109/smc58881.2025.11343608","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11343608","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025153228","display_name":"Zhichao Chen","orcid":"https://orcid.org/0000-0002-1362-7940"},"institutions":[{"id":"https://openalex.org/I101479585","display_name":"South China Agricultural University","ror":"https://ror.org/05v9jqt67","country_code":"CN","type":"education","lineage":["https://openalex.org/I101479585"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhichao Chen","raw_affiliation_strings":["South China Agricultural University,College of Mathematics and Informatics,Guangzhou,China,510642"],"affiliations":[{"raw_affiliation_string":"South China Agricultural University,College of Mathematics and Informatics,Guangzhou,China,510642","institution_ids":["https://openalex.org/I101479585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029377169","display_name":"Cankun Zhong","orcid":"https://orcid.org/0000-0002-4271-6483"},"institutions":[{"id":"https://openalex.org/I101479585","display_name":"South China Agricultural University","ror":"https://ror.org/05v9jqt67","country_code":"CN","type":"education","lineage":["https://openalex.org/I101479585"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cankun Zhong","raw_affiliation_strings":["South China Agricultural University,College of Mathematics and Informatics,Guangzhou,China,510642"],"affiliations":[{"raw_affiliation_string":"South China Agricultural University,College of Mathematics and Informatics,Guangzhou,China,510642","institution_ids":["https://openalex.org/I101479585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100604860","display_name":"Yun Liang","orcid":"https://orcid.org/0000-0002-9076-7998"},"institutions":[{"id":"https://openalex.org/I101479585","display_name":"South China Agricultural University","ror":"https://ror.org/05v9jqt67","country_code":"CN","type":"education","lineage":["https://openalex.org/I101479585"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yun Liang","raw_affiliation_strings":["South China Agricultural University,College of Mathematics and Informatics,Guangzhou,China,510642"],"affiliations":[{"raw_affiliation_string":"South China Agricultural University,College of Mathematics and Informatics,Guangzhou,China,510642","institution_ids":["https://openalex.org/I101479585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124134355","display_name":"Tang Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I101479585","display_name":"South China Agricultural University","ror":"https://ror.org/05v9jqt67","country_code":"CN","type":"education","lineage":["https://openalex.org/I101479585"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tang Luo","raw_affiliation_strings":["South China Agricultural University,College of Mathematics and Informatics,Guangzhou,China,510642"],"affiliations":[{"raw_affiliation_string":"South China Agricultural University,College of Mathematics and Informatics,Guangzhou,China,510642","institution_ids":["https://openalex.org/I101479585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034678630","display_name":"Yihang Zhang","orcid":"https://orcid.org/0009-0008-9446-5111"},"institutions":[{"id":"https://openalex.org/I101479585","display_name":"South China Agricultural University","ror":"https://ror.org/05v9jqt67","country_code":"CN","type":"education","lineage":["https://openalex.org/I101479585"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yihang Zhang","raw_affiliation_strings":["South China Agricultural University,College of Mathematics and Informatics,Guangzhou,China,510642"],"affiliations":[{"raw_affiliation_string":"South China Agricultural University,College of Mathematics and Informatics,Guangzhou,China,510642","institution_ids":["https://openalex.org/I101479585"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124107690","display_name":"Wing W. Y. Ng","orcid":null},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wing W. Y. Ng","raw_affiliation_strings":["South China University of Technology,Guangdong Provincial Key Laboratory of Computational Intelligence and Cyberspace Information, School of Computer Science and Engineering,Guangzhou,China,510006"],"affiliations":[{"raw_affiliation_string":"South China University of Technology,Guangdong Provincial Key Laboratory of Computational Intelligence and Cyberspace Information, School of Computer Science and Engineering,Guangzhou,China,510006","institution_ids":["https://openalex.org/I90610280"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5025153228"],"corresponding_institution_ids":["https://openalex.org/I101479585"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.69138719,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3522","last_page":"3527"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.009800000116229057,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.0017000000225380063,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8403000235557556},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6391000151634216},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.6243000030517578},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5680000185966492},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5357000231742859},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.5153999924659729},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5002999901771545},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4975000023841858}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8403000235557556},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6735000014305115},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6391000151634216},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.6243000030517578},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5680000185966492},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5357000231742859},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5231999754905701},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.5153999924659729},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5101000070571899},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5002999901771545},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4975000023841858},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.4341000020503998},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.4075999855995178},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3553999960422516},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3456999957561493},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.3199999928474426},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30820000171661377},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.3066999912261963},{"id":"https://openalex.org/C2984030306","wikidata":"https://www.wikidata.org/wiki/Q4819857","display_name":"Sound analysis","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2687000036239624}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc58881.2025.11343608","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11343608","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.7737172245979309}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321921","display_name":"Natural Science Foundation of Guangdong Province","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1987971958","https://openalex.org/W2591013610","https://openalex.org/W2759976799","https://openalex.org/W2973152780","https://openalex.org/W2981733351","https://openalex.org/W3016059657","https://openalex.org/W3161792602","https://openalex.org/W4221149441","https://openalex.org/W4283582457","https://openalex.org/W4315926819","https://openalex.org/W4372260310","https://openalex.org/W4375869216","https://openalex.org/W4392904244","https://openalex.org/W4392904307","https://openalex.org/W4408353947"],"related_works":[],"abstract_inverted_index":{"Sound":[0,68],"Event":[1,69],"Detection":[2,70],"(SED)":[3],"aims":[4],"to":[5,40,132],"identify":[6],"and":[7,44,54,65,81,127,141],"locate":[8],"specific":[9,104],"sound":[10],"events":[11],"within":[12],"audio":[13],"streams.":[14],"Despite":[15],"recent":[16],"advancements,":[17],"current":[18],"methodologies":[19],"exhibit":[20],"two":[21],"fundamental":[22],"limitations:":[23],"(1)":[24],"Insufficient":[25],"modeling":[26],"of":[27,74,137],"discriminative":[28,100],"temporal-spectral":[29,105],"characteristics":[30],"induces":[31,48],"compromised":[32,110],"differentiation":[33],"for":[34],"acoustically":[35],"similar":[36],"events.":[37],"(2)":[38],"Failing":[39],"address":[41,134],"both":[42],"inter-class":[43,138],"intra-class":[45,142],"imbalance":[46,140],"problems":[47],"biased":[49],"classifications":[50],"toward":[51],"dominant":[52],"classes":[53],"inactive":[55],"frames.":[56],"To":[57],"tackle":[58],"these":[59],"challenges,":[60],"we":[61],"propose":[62],"Acoustic":[63],"Clustering":[64],"Imbalance":[66],"Learning-based":[67],"(ACIL-SED),":[71],"which":[72],"consists":[73],"Cluster-Specialized":[75],"Convolutional":[76],"Recurrent":[77],"Neural":[78],"Network":[79],"(CS-CRNN)":[80],"Dual-Objective":[82],"Adaptive":[83],"Balance":[84],"Loss":[85],"(DOABLoss).":[86],"The":[87,119],"CS-CRNN":[88],"employs":[89],"acoustic":[90,156],"clustering-guided":[91],"specialized":[92],"sub-models,":[93],"where":[94],"each":[95],"cluster\u2019s":[96],"sub-model":[97],"focuses":[98],"on":[99],"feature":[101,111],"learning":[102],"in":[103,114,154],"characteristics,":[106],"thereby":[107],"resolving":[108],"the":[109,135],"representation":[112],"inherent":[113],"a":[115],"shared":[116],"single-model":[117],"architecture.":[118],"DOABLoss":[120],"adaptively":[121],"combines":[122],"mean":[123],"squared":[124],"error":[125],"(MSE)":[126],"weighted":[128],"binary":[129],"cross-entropy":[130],"(WBCE)":[131],"simultaneously":[133],"issues":[136],"duration":[139],"active-inactive":[143],"frame":[144],"imbalance.":[145],"Experimental":[146],"results":[147],"show":[148],"that":[149],"ACIL-SED":[150],"outperforms":[151],"state-of-the-art":[152],"methods":[153],"complex":[155],"environments.":[157]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-01-29T00:00:00"}
