{"id":"https://openalex.org/W7148590559","doi":"https://doi.org/10.1109/asru65441.2025.11434718","title":"Improving Speech Enhancement with Multi-Metric Supervision from Learned Quality Assessment","display_name":"Improving Speech Enhancement with Multi-Metric Supervision from Learned Quality Assessment","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148590559","doi":"https://doi.org/10.1109/asru65441.2025.11434718"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434718","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434718","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100391700","display_name":"Wei Wang","orcid":"https://orcid.org/0000-0001-6853-7785"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wei Wang","raw_affiliation_strings":["Shanghai Jiao Tong University,China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071937621","display_name":"Wangyou Zhang","orcid":"https://orcid.org/0000-0003-4500-3515"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wangyou Zhang","raw_affiliation_strings":["Shanghai Jiao Tong University,China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132813191","display_name":"Chenda Li","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenda Li","raw_affiliation_strings":["Shanghai Jiao Tong University,China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132789680","display_name":"Jaitong Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jaitong Shi","raw_affiliation_strings":["Carnegie Mellon University,USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132789061","display_name":"Shinji Watanabe","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Carnegie Mellon University,USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129685555","display_name":"Yanmin Qian","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanmin Qian","raw_affiliation_strings":["Shanghai Jiao Tong University,China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100391700"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":2.5565,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.91231572,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.010900000110268593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.0013000000035390258,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.6330000162124634},{"id":"https://openalex.org/keywords/quality-assessment","display_name":"Quality assessment","score":0.5436999797821045},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.44670000672340393},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4226999878883362},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.41999998688697815},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.41929998993873596},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4154999852180481},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4117000102996826}],"concepts":[{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.6330000162124634},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6028000116348267},{"id":"https://openalex.org/C3020001037","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assessment","level":3,"score":0.5436999797821045},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.44670000672340393},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4226999878883362},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.41999998688697815},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.41929998993873596},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4154999852180481},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41499999165534973},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4117000102996826},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.374099999666214},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.36480000615119934},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.33320000767707825},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3327000141143799},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32600000500679016},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.31630000472068787},{"id":"https://openalex.org/C99209842","wikidata":"https://www.wikidata.org/wiki/Q643696","display_name":"Speech perception","level":3,"score":0.29159998893737793},{"id":"https://openalex.org/C106436119","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assurance","level":3,"score":0.289000004529953},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2840999960899353},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.25699999928474426},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.25540000200271606}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434718","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434718","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4791506826877594,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320338440","display_name":"HORIZON EUROPE Health","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W1989337816","https://openalex.org/W2107860279","https://openalex.org/W2127851351","https://openalex.org/W2516001803","https://openalex.org/W2559260703","https://openalex.org/W2962780374","https://openalex.org/W2964058413","https://openalex.org/W2972436155","https://openalex.org/W3097906045","https://openalex.org/W3145029257","https://openalex.org/W3196475561","https://openalex.org/W3197580070","https://openalex.org/W3202278141","https://openalex.org/W3209059054","https://openalex.org/W3209490467","https://openalex.org/W3209984917","https://openalex.org/W4225302959","https://openalex.org/W4225956675","https://openalex.org/W4253928870","https://openalex.org/W4293846201","https://openalex.org/W4296068974","https://openalex.org/W4297841603","https://openalex.org/W4375928773","https://openalex.org/W4383961998","https://openalex.org/W4391021560","https://openalex.org/W4391021774","https://openalex.org/W4393859250","https://openalex.org/W4395959004","https://openalex.org/W4399849612","https://openalex.org/W4402111636","https://openalex.org/W4402111799","https://openalex.org/W4402112079","https://openalex.org/W4402112192","https://openalex.org/W4406137543","https://openalex.org/W4406461503","https://openalex.org/W4406461865","https://openalex.org/W4408352093","https://openalex.org/W4408353835","https://openalex.org/W4415432930","https://openalex.org/W4415433004","https://openalex.org/W4415433170","https://openalex.org/W4415433286","https://openalex.org/W4415433380","https://openalex.org/W4415795993","https://openalex.org/W7148612566"],"related_works":[],"abstract_inverted_index":{"Speech":[0],"quality":[1,9,32,106,144],"assessment":[2],"(SQA)":[3],"aims":[4],"to":[5,23,29,44,50,70,102],"predict":[6,71],"the":[7],"perceived":[8],"of":[10,17,92,143],"speech":[11,24,31],"signals":[12],"under":[13],"a":[14,61,66,76,81,89,141],"wide":[15],"range":[16,142],"distortions.":[18],"It":[19],"is":[20],"inherently":[21],"connected":[22],"enhancement":[25],"(SE),":[26],"which":[27,99],"seeks":[28],"improve":[30],"by":[33],"removing":[34],"unwanted":[35],"signal":[36,83],"components.":[37],"While":[38],"SQA":[39,67],"models":[40],"are":[41,123,149],"widely":[42],"used":[43],"evaluate":[45],"SE":[46,52,78,94],"performance,":[47],"their":[48],"potential":[49],"guide":[51],"training":[53,62,116,136],"remains":[54],"underexplored.":[55],"In":[56],"this":[57],"work,":[58],"we":[59],"investigate":[60],"framework":[63],"that":[64,134],"leverages":[65],"model,":[68],"trained":[69],"multiple":[72],"evaluation":[73,111],"metrics":[74],"from":[75],"public":[77],"leaderboard,":[79],"as":[80,97],"supervisory":[82],"for":[84],"SE.":[85],"This":[86],"approach":[87],"addresses":[88],"key":[90],"limitation":[91],"conventional":[93],"objectives,":[95],"such":[96],"SI-SNR,":[98],"often":[100],"fail":[101],"align":[103],"with":[104],"perceptual":[105],"and":[107,129,147],"generalize":[108],"poorly":[109],"across":[110,140],"metrics.":[112,145],"Moreover,":[113],"it":[114],"enables":[115],"on":[117,126],"realworld":[118],"data":[119],"where":[120],"clean":[121],"references":[122],"unavailable.":[124],"Experiments":[125],"both":[127],"simulated":[128],"real-world":[130],"test":[131],"sets":[132],"show":[133],"SQA-guided":[135],"consistently":[137],"improves":[138],"performance":[139],"Code":[146],"checkpoints":[148],"available<sup":[150],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[151,153],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>.<sup":[152],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>https://github.com/urgent-challenge/urgent2026_challenge_track2":[154]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2026-04-03T00:00:00"}
