{"id":"https://openalex.org/W4385822703","doi":"https://doi.org/10.21437/interspeech.2023-1547","title":"Diffiner: A Versatile Diffusion-based Generative Refiner for Speech Enhancement","display_name":"Diffiner: A Versatile Diffusion-based Generative Refiner for Speech Enhancement","publication_year":2023,"publication_date":"2023-08-14","ids":{"openalex":"https://openalex.org/W4385822703","doi":"https://doi.org/10.21437/interspeech.2023-1547"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2023-1547","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2023-1547","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERSPEECH 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010892279","display_name":"Ryosuke Sawata","orcid":"https://orcid.org/0000-0003-3230-4335"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ryosuke Sawata","raw_affiliation_strings":["Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031225419","display_name":"Naoki Murata","orcid":"https://orcid.org/0000-0001-7418-5173"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Naoki Murata","raw_affiliation_strings":["Sony Research, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Research, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078773088","display_name":"Yuhta Takida","orcid":"https://orcid.org/0000-0001-7384-0842"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuhta Takida","raw_affiliation_strings":["Sony Research, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Research, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034556374","display_name":"Toshimitsu Uesaka","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Toshimitsu Uesaka","raw_affiliation_strings":["Sony Research, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Research, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084845461","display_name":"Takashi Shibuya","orcid":"https://orcid.org/0000-0002-4277-0164"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Takashi Shibuya","raw_affiliation_strings":["Sony Research, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Research, Tokyo, Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104117184","display_name":"Shusuke Takahashi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shusuke Takahashi","raw_affiliation_strings":["Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088754502","display_name":"Yuki Mitsufuji","orcid":"https://orcid.org/0000-0002-6806-6140"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yuki Mitsufuji","raw_affiliation_strings":["Sony Research, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony Research, Tokyo, Japan","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5088754502"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.1136,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.88236749,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3824","last_page":"3828"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13289","display_name":"Infant Health and Development","score":0.9592000246047974,"subfield":{"id":"https://openalex.org/subfields/3611","display_name":"Pharmacy"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6858518719673157},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.6181381940841675},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5076297521591187},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.484611839056015},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.415652871131897},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3411995470523834},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33199775218963623},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07302689552307129}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6858518719673157},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.6181381940841675},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5076297521591187},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.484611839056015},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.415652871131897},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3411995470523834},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33199775218963623},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07302689552307129},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2023-1547","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2023-1547","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERSPEECH 2023","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1510355813","https://openalex.org/W1522301498","https://openalex.org/W1552314771","https://openalex.org/W1959608418","https://openalex.org/W2067295501","https://openalex.org/W2094721231","https://openalex.org/W2397728357","https://openalex.org/W2516001803","https://openalex.org/W2603567530","https://openalex.org/W2749337587","https://openalex.org/W2805233667","https://openalex.org/W2889530575","https://openalex.org/W2922332774","https://openalex.org/W2937998856","https://openalex.org/W2949558265","https://openalex.org/W2949756029","https://openalex.org/W2952218014","https://openalex.org/W2963090522","https://openalex.org/W2963341071","https://openalex.org/W2963452667","https://openalex.org/W2964058413","https://openalex.org/W2972412503","https://openalex.org/W3036167779","https://openalex.org/W3105013723","https://openalex.org/W3121370741","https://openalex.org/W3127136429","https://openalex.org/W3157804362","https://openalex.org/W3160085755","https://openalex.org/W3162926177","https://openalex.org/W3163827866","https://openalex.org/W3168053944","https://openalex.org/W3184410885","https://openalex.org/W3197284240","https://openalex.org/W4200635400","https://openalex.org/W4221144097","https://openalex.org/W4225302959","https://openalex.org/W4225759859","https://openalex.org/W4281820413","https://openalex.org/W4297841790","https://openalex.org/W4302759964","https://openalex.org/W4320013936","https://openalex.org/W4372341629","https://openalex.org/W4380434618"],"related_works":["https://openalex.org/W4365211920","https://openalex.org/W2789919619","https://openalex.org/W2293457016","https://openalex.org/W3169305685","https://openalex.org/W4303649048","https://openalex.org/W2100833569","https://openalex.org/W4386566512","https://openalex.org/W3215037513","https://openalex.org/W2026477238","https://openalex.org/W4289915735"],"abstract_inverted_index":{"Although":[0],"deep":[1],"neural":[2],"network":[3],"(DNN)-based":[4],"speech":[5,37,56,137],"enhancement":[6],"(SE)":[7],"methods":[8,103,121,144],"outperform":[9],"the":[10,17,71,141],"previous":[11],"non-DNN-based":[12],"ones,":[13],"they":[14],"often":[15],"degrade":[16],"perceptual":[18,36,136],"quality":[19,38,138],"of":[20,54,93,128,140],"generated":[21,65],"outputs.To":[22],"tackle":[23],"this":[24],"problem,":[25],"we":[26],"introduce":[27],"a":[28,45,51,78,91,116],"DNN-based":[29],"generative":[30,47],"refiner,":[31],"Diffiner,":[32],"aiming":[33],"to":[34,100],"improve":[35],"pre-processed":[39],"by":[40,49,77],"an":[41],"SE":[42,80,102,110,143],"method.We":[43],"train":[44],"diffusionbased":[46],"model":[48],"utilizing":[50],"dataset":[52],"consisting":[53],"clean":[55,62,94],"only.Then,":[57],"our":[58,86,112,133],"refiner":[59,87,113],"effectively":[60],"mixes":[61],"parts":[63,75],"newly":[64],"via":[66],"denoising":[67],"diffusion":[68],"restoration":[69],"into":[70],"degraded":[72],"and":[73,122],"distorted":[74],"caused":[76],"preceding":[79,142],"method,":[81],"resulting":[82],"in":[83,126],"refined":[84],"speech.Once":[85],"is":[88,147],"trained":[89],"on":[90],"set":[92],"speech,":[95],"it":[96],"can":[97,114],"be":[98,115],"applied":[99],"various":[101],"without":[104],"additional":[105],"training":[106],"specialized":[107],"for":[108],"each":[109],"module.Therefore,":[111],"versatile":[117],"post-processing":[118],"module":[119],"w.r.t.SE":[120],"has":[123],"high":[124],"potential":[125],"terms":[127],"modularity.Experimental":[129],"results":[130],"show":[131],"that":[132],"method":[134],"improved":[135],"regardless":[139],"used.Our":[145],"code":[146],"available":[148],"at":[149],"https://github.com/sony/diffiner.":[150]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2023-08-15T00:00:00"}
