{"id":"https://openalex.org/W4410779772","doi":"https://doi.org/10.1109/icasspw65056.2025.11011161","title":"Leveraging LLM and Text-Queried Separation for Noise-Robust Sound Event Detection","display_name":"Leveraging LLM and Text-Queried Separation for Noise-Robust Sound Event Detection","publication_year":2025,"publication_date":"2025-04-06","ids":{"openalex":"https://openalex.org/W4410779772","doi":"https://doi.org/10.1109/icasspw65056.2025.11011161"},"language":"en","primary_location":{"id":"doi:10.1109/icasspw65056.2025.11011161","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icasspw65056.2025.11011161","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113113933","display_name":"Han Yin","orcid":null},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Han Yin","raw_affiliation_strings":["Northwestern Polytechnical University,School of Marine Science and Technology,Xi&#x2019;an,China"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University,School of Marine Science and Technology,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082028932","display_name":"Yang Xiao","orcid":"https://orcid.org/0000-0002-3982-9540"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang Xiao","raw_affiliation_strings":["Fortemedia Singapore,Singapore"],"affiliations":[{"raw_affiliation_string":"Fortemedia Singapore,Singapore","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027263923","display_name":"Jisheng Bai","orcid":"https://orcid.org/0000-0002-9803-8212"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jisheng Bai","raw_affiliation_strings":["Northwestern Polytechnical University,School of Marine Science and Technology,Xi&#x2019;an,China"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University,School of Marine Science and Technology,Xi&#x2019;an,China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029637694","display_name":"Rohan Kumar Das","orcid":"https://orcid.org/0000-0002-1332-3357"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rohan Kumar Das","raw_affiliation_strings":["Fortemedia Singapore,Singapore"],"affiliations":[{"raw_affiliation_string":"Fortemedia Singapore,Singapore","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5113113933"],"corresponding_institution_ids":["https://openalex.org/I17145004"],"apc_list":null,"apc_paid":null,"fwci":3.0542,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.90890709,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9768999814987183,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7166262865066528},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5806655883789062},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.5511547327041626},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.5458250045776367},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.5386656522750854},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.45329612493515015},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30521076917648315},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.12789249420166016},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.0672181248664856}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7166262865066528},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5806655883789062},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5511547327041626},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.5458250045776367},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.5386656522750854},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.45329612493515015},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30521076917648315},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.12789249420166016},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0672181248664856},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icasspw65056.2025.11011161","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icasspw65056.2025.11011161","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1924762813","https://openalex.org/W1970578576","https://openalex.org/W2296073425","https://openalex.org/W2296193698","https://openalex.org/W2591013610","https://openalex.org/W2593116425","https://openalex.org/W2759176740","https://openalex.org/W2889604124","https://openalex.org/W2963390466","https://openalex.org/W3006275583","https://openalex.org/W3008335638","https://openalex.org/W3015190346","https://openalex.org/W3015199127","https://openalex.org/W3142849873","https://openalex.org/W3166409449","https://openalex.org/W3178592608","https://openalex.org/W4224871700","https://openalex.org/W4224920041","https://openalex.org/W4313061427","https://openalex.org/W4372260310","https://openalex.org/W4385823066","https://openalex.org/W4391790571","https://openalex.org/W4408353947","https://openalex.org/W4408354509","https://openalex.org/W6600220327","https://openalex.org/W6778883912","https://openalex.org/W6848208918","https://openalex.org/W6850625674","https://openalex.org/W6869853846","https://openalex.org/W6870090694","https://openalex.org/W6870174606","https://openalex.org/W6870977964"],"related_works":["https://openalex.org/W2071676784","https://openalex.org/W4292513318","https://openalex.org/W4308092240","https://openalex.org/W2287611352","https://openalex.org/W320684304","https://openalex.org/W1509352139","https://openalex.org/W4385464961","https://openalex.org/W2060903012","https://openalex.org/W2021161555","https://openalex.org/W2077498359"],"abstract_inverted_index":{"Sound":[0],"Event":[1],"Detection":[2],"(SED)":[3],"is":[4,41,93],"challenging":[5],"in":[6,44,117,128,140],"noisy":[7,29,45,118],"environments":[8],"where":[9],"overlapping":[10,138],"sounds":[11],"obscure":[12],"target":[13,24,39],"events.":[14],"Language-queried":[15],"audio":[16],"source":[17],"separation":[18],"(LASS)":[19],"aims":[20],"to":[21,49,65,74,95],"isolate":[22],"the":[23,37,58,104,111],"sound":[25,40],"events":[26,139],"from":[27],"a":[28,83,133],"clip.":[30],"However,":[31],"this":[32,54],"approach":[33],"can":[34],"fail":[35],"when":[36],"exact":[38],"unknown,":[42],"particularly":[43],"test":[46],"sets,":[47],"leading":[48],"reduced":[50],"performance.":[51],"To":[52],"address":[53],"issue,":[55],"we":[56,81],"leverage":[57],"capabilities":[59],"of":[60,126],"large":[61],"language":[62],"models":[63,145],"(LLMs)":[64],"analyze":[66],"and":[67,76,131,143],"summarize":[68],"acoustic":[69],"data.":[70],"By":[71],"using":[72],"LLMs":[73,127],"identify":[75],"select":[77],"specific":[78],"noise":[79,84],"types,":[80],"implement":[82],"augmentation":[85],"method":[86,113],"for":[87,103,136],"noise-robust":[88,129],"fine-tuning.":[89],"The":[90],"fine-tuned":[91],"model":[92],"applied":[94],"predict":[96],"clip-wise":[97],"event":[98],"predictions":[99],"as":[100],"text":[101],"queries":[102],"LASS":[105],"model.":[106],"Our":[107],"studies":[108],"demonstrate":[109],"that":[110],"proposed":[112],"improves":[114],"SED":[115,130],"performance":[116],"environments.":[119],"This":[120],"work":[121],"represents":[122],"an":[123],"early":[124],"application":[125],"suggests":[132],"promising":[134],"direction":[135],"handling":[137],"SED.":[141],"Codes":[142],"pretrained":[144],"are":[146],"available":[147],"at":[148],"https://github.com/apple-yinhan/Noise-robust-SED.":[149]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-25T14:56:36.534964","created_date":"2025-10-10T00:00:00"}
