{"id":"https://openalex.org/W4375869120","doi":"https://doi.org/10.1109/icassp49357.2023.10096064","title":"Cold Diffusion for Speech Enhancement","display_name":"Cold Diffusion for Speech Enhancement","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869120","doi":"https://doi.org/10.1109/icassp49357.2023.10096064"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096064","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096064","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000242115","display_name":"Hao Yen","orcid":"https://orcid.org/0000-0001-8897-4368"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]},{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hao Yen","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","School of Electrical and Computer Engineering, Georgia Institute of Technology, GA, USA","Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"School of Electrical and Computer Engineering, Georgia Institute of Technology, GA, USA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102812631","display_name":"Fran\u00e7ois G. Germain","orcid":"https://orcid.org/0000-0002-8973-5315"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fran\u00e7ois G. Germain","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086940921","display_name":"Gordon Wichern","orcid":"https://orcid.org/0000-0002-8597-6795"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gordon Wichern","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064097430","display_name":"Jonathan Le Roux","orcid":"https://orcid.org/0000-0002-0158-2837"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jonathan Le Roux","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]},{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL), Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210159266"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":8.3143,"has_fulltext":false,"cited_by_count":45,"citation_normalized_percentile":{"value":0.98464777,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7552712559700012},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.7517474889755249},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7304012775421143},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6744786500930786},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.6136485934257507},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5633521676063538},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.49109506607055664},{"id":"https://openalex.org/keywords/diffusion-process","display_name":"Diffusion process","score":0.4613810181617737},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4481351971626282},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.35033154487609863},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3421311378479004},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.2272275984287262},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.06947138905525208},{"id":"https://openalex.org/keywords/innovation-diffusion","display_name":"Innovation diffusion","score":0.05820870399475098}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7552712559700012},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.7517474889755249},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7304012775421143},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6744786500930786},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.6136485934257507},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5633521676063538},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.49109506607055664},{"id":"https://openalex.org/C68710425","wikidata":"https://www.wikidata.org/wiki/Q5275442","display_name":"Diffusion process","level":3,"score":0.4613810181617737},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4481351971626282},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.35033154487609863},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3421311378479004},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.2272275984287262},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.06947138905525208},{"id":"https://openalex.org/C3017618536","wikidata":"https://www.wikidata.org/wiki/Q304994","display_name":"Innovation diffusion","level":2,"score":0.05820870399475098},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096064","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096064","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6600000262260437,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1482149378","https://openalex.org/W1552314771","https://openalex.org/W1677182931","https://openalex.org/W1992475611","https://openalex.org/W2069681747","https://openalex.org/W2078528584","https://openalex.org/W2129069237","https://openalex.org/W2144404214","https://openalex.org/W2291877678","https://openalex.org/W2405774341","https://openalex.org/W2519091744","https://openalex.org/W2594607416","https://openalex.org/W2755891984","https://openalex.org/W2757519008","https://openalex.org/W2765425905","https://openalex.org/W2889442120","https://openalex.org/W2949558265","https://openalex.org/W2952218014","https://openalex.org/W2954275688","https://openalex.org/W2963341071","https://openalex.org/W2972443522","https://openalex.org/W2981497094","https://openalex.org/W3016056257","https://openalex.org/W3016912202","https://openalex.org/W3096408984","https://openalex.org/W3097945073","https://openalex.org/W3099330747","https://openalex.org/W3109018774","https://openalex.org/W3131332223","https://openalex.org/W3160324929","https://openalex.org/W3160567113","https://openalex.org/W3174264304","https://openalex.org/W3211438798","https://openalex.org/W4221144097","https://openalex.org/W4297841790","https://openalex.org/W6629014544","https://openalex.org/W6738884980","https://openalex.org/W6762114000","https://openalex.org/W6769016838"],"related_works":["https://openalex.org/W2965546495","https://openalex.org/W4389116644","https://openalex.org/W2153315159","https://openalex.org/W3103844505","https://openalex.org/W259157601","https://openalex.org/W4205463238","https://openalex.org/W2761785940","https://openalex.org/W2378211422","https://openalex.org/W2110523656","https://openalex.org/W1482209366"],"abstract_inverted_index":{"Diffusion":[0],"models":[1,124],"have":[2],"recently":[3,34],"shown":[4],"promising":[5],"results":[6,105],"for":[7],"difficult":[8],"enhancement":[9,109,127],"tasks":[10],"such":[11],"as":[12],"the":[13,29,56,86,91,113,117],"conditional":[14],"and":[15,21,82,125],"unconditional":[16],"restoration":[17],"of":[18,31,55,116],"natural":[19],"images":[20],"audio":[22],"signals.":[23,50],"In":[24],"this":[25],"work,":[26],"we":[27,76],"explore":[28],"possibility":[30],"leveraging":[32],"a":[33],"proposed":[35,97,118],"advanced":[36],"iterative":[37],"diffusion":[38,61],"model,":[39],"namely":[40],"cold":[41,60],"diffusion,":[42],"to":[43,65,84,121],"recover":[44],"clean":[45],"speech":[46,108],"signals":[47],"from":[48,59,69],"noisy":[49],"The":[51],"unique":[52],"mathematical":[53],"properties":[54],"sampling":[57,92],"process":[58],"could":[62],"be":[63],"utilized":[64],"restore":[66],"high-quality":[67],"samples":[68],"arbitrary":[70],"degradations.":[71],"Based":[72],"on":[73,106],"these":[74],"properties,":[75],"propose":[77],"an":[78],"improved":[79],"training":[80],"algorithm":[81],"objective":[83],"help":[85],"model":[87,102],"generalize":[88],"better":[89],"during":[90],"process.":[93],"We":[94],"verify":[95],"our":[96],"framework":[98],"by":[99],"investigating":[100],"two":[101],"architectures.":[103],"Experimental":[104],"benchmark":[107],"dataset":[110],"VoiceBank-DEMAND":[111],"demonstrate":[112],"strong":[114],"performance":[115],"approach":[119],"compared":[120],"representative":[122],"discriminative":[123],"diffusion-based":[126],"models.":[128]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":24},{"year":2023,"cited_by_count":8}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
