{"id":"https://openalex.org/W4402980879","doi":"https://doi.org/10.1109/icme57554.2024.10687538","title":"Noise Adaptive Fine-grained Speech Intelligibility Enhancement With Soft-label Guided Diffusion","display_name":"Noise Adaptive Fine-grained Speech Intelligibility Enhancement With Soft-label Guided Diffusion","publication_year":2024,"publication_date":"2024-07-15","ids":{"openalex":"https://openalex.org/W4402980879","doi":"https://doi.org/10.1109/icme57554.2024.10687538"},"language":"en","primary_location":{"id":"doi:10.1109/icme57554.2024.10687538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687538","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102921009","display_name":"Chenyi Zhu","orcid":"https://orcid.org/0009-0004-9875-152X"},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chenyi Zhu","raw_affiliation_strings":["Jianghan University,School of Artificial Intelligence,Wuhan,China"],"affiliations":[{"raw_affiliation_string":"Jianghan University,School of Artificial Intelligence,Wuhan,China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055698842","display_name":"Dengshi Li","orcid":"https://orcid.org/0000-0002-3349-8664"},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dengshi Li","raw_affiliation_strings":["Jianghan University,School of Artificial Intelligence,Wuhan,China"],"affiliations":[{"raw_affiliation_string":"Jianghan University,School of Artificial Intelligence,Wuhan,China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081196879","display_name":"Aolei Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Aolei Chen","raw_affiliation_strings":["Jianghan University,School of Artificial Intelligence,Wuhan,China"],"affiliations":[{"raw_affiliation_string":"Jianghan University,School of Artificial Intelligence,Wuhan,China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020803502","display_name":"Yu Gao","orcid":"https://orcid.org/0000-0002-2135-7872"},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Gao","raw_affiliation_strings":["Jianghan University,School of Artificial Intelligence,Wuhan,China"],"affiliations":[{"raw_affiliation_string":"Jianghan University,School of Artificial Intelligence,Wuhan,China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115591090","display_name":"Wei Li","orcid":"https://orcid.org/0000-0002-9791-9763"},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Li","raw_affiliation_strings":["Jianghan University,School of Artificial Intelligence,Wuhan,China"],"affiliations":[{"raw_affiliation_string":"Jianghan University,School of Artificial Intelligence,Wuhan,China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5115614285","display_name":"Xi Wang","orcid":"https://orcid.org/0009-0009-5146-8408"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xi Wang","raw_affiliation_strings":["Hubei Huazhong Electric Power Technology Development,Power Grid Business Department,Wuhan,China"],"affiliations":[{"raw_affiliation_string":"Hubei Huazhong Electric Power Technology Development,Power Grid Business Department,Wuhan,China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102921009"],"corresponding_institution_ids":["https://openalex.org/I31590910"],"apc_list":null,"apc_paid":null,"fwci":0.7274,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.76460892,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.7584162950515747},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.738947868347168},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6088343858718872},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5809724926948547},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.41990533471107483},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.38227903842926025},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.2999439835548401},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.18860694766044617},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08189821243286133}],"concepts":[{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.7584162950515747},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.738947868347168},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6088343858718872},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5809724926948547},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.41990533471107483},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.38227903842926025},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.2999439835548401},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.18860694766044617},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08189821243286133},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme57554.2024.10687538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687538","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.5400000214576721,"display_name":"Reduced inequalities"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320326959","display_name":"Jianghan University","ror":"https://ror.org/041c9x778"},{"id":"https://openalex.org/F4320330944","display_name":"Nature","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W1998648683","https://openalex.org/W2046376809","https://openalex.org/W2048653958","https://openalex.org/W2092644348","https://openalex.org/W2119500623","https://openalex.org/W2125114513","https://openalex.org/W2404548911","https://openalex.org/W2516001803","https://openalex.org/W2749552851","https://openalex.org/W2771877711","https://openalex.org/W2810311710","https://openalex.org/W2936173958","https://openalex.org/W3016077835","https://openalex.org/W3034443407","https://openalex.org/W3035123403","https://openalex.org/W4221144097","https://openalex.org/W4372266968","https://openalex.org/W4375869364","https://openalex.org/W4379470385","https://openalex.org/W4380303621","https://openalex.org/W4386057725","https://openalex.org/W6786375611","https://openalex.org/W6795261426","https://openalex.org/W6795288823","https://openalex.org/W6802527329"],"related_works":["https://openalex.org/W1986772939","https://openalex.org/W2037635165","https://openalex.org/W2738829087","https://openalex.org/W2127461790","https://openalex.org/W2069324367","https://openalex.org/W2542062716","https://openalex.org/W3096184950","https://openalex.org/W1505346162","https://openalex.org/W4200562864","https://openalex.org/W4231424160"],"abstract_inverted_index":{"Background":[0],"noise":[1,123],"in":[2,59],"the":[3,8,96,106],"listening":[4],"stage":[5],"often":[6],"affects":[7],"speech":[9,30,46,49,66,80,117],"intelligibility":[10,42,57,81,118],"and":[11,68,102],"quality":[12,69],"of":[13,108],"communication":[14],"devices,":[15],"such":[16],"as":[17],"mobile":[18],"phones.":[19],"Traditional":[20],"approaches":[21],"like":[22],"Near-end":[23],"Listening":[24],"Enhancement":[25],"(NELE)":[26],"aimed":[27],"at":[28],"processing":[29],"signals":[31],"to":[32,40,47,63],"enhance":[33,41],"intelligibility.":[34],"Recent":[35],"studies":[36],"have":[37],"been":[38],"conducted":[39],"by":[43,72],"converting":[44],"normal":[45],"Lombard":[48,86],"with":[50],"varying":[51],"level.":[52],"However,":[53],"overzealous":[54],"focus":[55],"on":[56,89],"improvement":[58],"previous":[60],"research":[61],"led":[62],"over-processing,":[64],"causing":[65],"distortion":[67],"degradation.":[70],"Motivated":[71],"soft-label":[73],"guidance,":[74],"we":[75],"propose":[76],"a":[77,113],"noise-adaptive":[78],"fine-grained":[79,116],"enhancement":[82,119],"framework\u2014NELE-Diff.":[83],"It":[84],"fine-tunes":[85],"intensity":[87],"based":[88],"noise,":[90],"incorporating":[91],"multi-metric":[92],"reinforcement":[93],"learning":[94],"into":[95],"diffusion":[97],"model":[98],"reverse":[99],"process.":[100],"Subjective":[101],"objective":[103],"experiments":[104],"reveal":[105],"superiority":[107],"NELE-Diff":[109],"over":[110],"baselines,":[111],"presenting":[112],"more":[114],"adaptive":[115],"framework":[120],"for":[121],"different":[122],"levels.":[124]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
