{"id":"https://openalex.org/W4392903345","doi":"https://doi.org/10.1109/icassp48485.2024.10447860","title":"Single and Few-Step Diffusion for Generative Speech Enhancement","display_name":"Single and Few-Step Diffusion for Generative Speech Enhancement","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903345","doi":"https://doi.org/10.1109/icassp48485.2024.10447860"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447860","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447860","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027458841","display_name":"Bunlong Lay","orcid":"https://orcid.org/0000-0002-0847-7896"},"institutions":[{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I4210136595","display_name":"Hamburg Institut (Germany)","ror":"https://ror.org/03t0n2419","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210136595"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Bunlong Lay","raw_affiliation_strings":["University Hamburg,Signal Processing,Hamburg,Germany","Signal Processing, University Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"University Hamburg,Signal Processing,Hamburg,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246","https://openalex.org/I4210136595"]},{"raw_affiliation_string":"Signal Processing, University Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094178929","display_name":"Jean-Marie Lermercier","orcid":null},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I4210136595","display_name":"Hamburg Institut (Germany)","ror":"https://ror.org/03t0n2419","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210136595"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jean-Marie Lermercier","raw_affiliation_strings":["University Hamburg,Signal Processing,Hamburg,Germany","Signal Processing, University Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"University Hamburg,Signal Processing,Hamburg,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246","https://openalex.org/I4210136595"]},{"raw_affiliation_string":"Signal Processing, University Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087017732","display_name":"Julius Richter","orcid":"https://orcid.org/0000-0002-7870-4839"},"institutions":[{"id":"https://openalex.org/I4210136595","display_name":"Hamburg Institut (Germany)","ror":"https://ror.org/03t0n2419","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210136595"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Julius Richter","raw_affiliation_strings":["University Hamburg,Signal Processing,Hamburg,Germany","Signal Processing, University Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"University Hamburg,Signal Processing,Hamburg,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246","https://openalex.org/I4210136595"]},{"raw_affiliation_string":"Signal Processing, University Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087022569","display_name":"Timo Gerkmann","orcid":"https://orcid.org/0000-0002-8678-4699"},"institutions":[{"id":"https://openalex.org/I4210136595","display_name":"Hamburg Institut (Germany)","ror":"https://ror.org/03t0n2419","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210136595"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]},{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Timo Gerkmann","raw_affiliation_strings":["University Hamburg,Signal Processing,Hamburg,Germany","Signal Processing, University Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"University Hamburg,Signal Processing,Hamburg,Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246","https://openalex.org/I4210136595"]},{"raw_affiliation_string":"Signal Processing, University Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5027458841"],"corresponding_institution_ids":["https://openalex.org/I159176309","https://openalex.org/I4210136595","https://openalex.org/I884043246"],"apc_list":null,"apc_paid":null,"fwci":5.6255,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.96637756,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"626","last_page":"630"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6937736868858337},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5499491095542908},{"id":"https://openalex.org/keywords/diffusion-process","display_name":"Diffusion process","score":0.5154231786727905},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.5069261193275452},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.4691198170185089},{"id":"https://openalex.org/keywords/discretization","display_name":"Discretization","score":0.4472019076347351},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.44209542870521545},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4252516031265259},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.42412522435188293},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4236524701118469},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.41041380167007446},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3956528604030609},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33616015315055847},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.3242866098880768},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2237672507762909},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.12866178154945374}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6937736868858337},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5499491095542908},{"id":"https://openalex.org/C68710425","wikidata":"https://www.wikidata.org/wiki/Q5275442","display_name":"Diffusion process","level":3,"score":0.5154231786727905},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.5069261193275452},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.4691198170185089},{"id":"https://openalex.org/C73000952","wikidata":"https://www.wikidata.org/wiki/Q17007827","display_name":"Discretization","level":2,"score":0.4472019076347351},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.44209542870521545},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4252516031265259},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.42412522435188293},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4236524701118469},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.41041380167007446},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3956528604030609},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33616015315055847},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.3242866098880768},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2237672507762909},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.12866178154945374},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C3017618536","wikidata":"https://www.wikidata.org/wiki/Q304994","display_name":"Innovation diffusion","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447860","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447860","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W1594563152","https://openalex.org/W1657347807","https://openalex.org/W1991111872","https://openalex.org/W2013035813","https://openalex.org/W2135284480","https://openalex.org/W2289394825","https://openalex.org/W2603567530","https://openalex.org/W2885308148","https://openalex.org/W2952218014","https://openalex.org/W2962866211","https://openalex.org/W2964058413","https://openalex.org/W3099330747","https://openalex.org/W4212774754","https://openalex.org/W4221144097","https://openalex.org/W4297841790","https://openalex.org/W4309362373","https://openalex.org/W4372268522","https://openalex.org/W4375869120","https://openalex.org/W4380434618","https://openalex.org/W4384080510","https://openalex.org/W4385823353","https://openalex.org/W6765775151","https://openalex.org/W6778946027","https://openalex.org/W6779823529","https://openalex.org/W6786375611","https://openalex.org/W6798721538","https://openalex.org/W6890361730"],"related_works":["https://openalex.org/W2006251942","https://openalex.org/W2364741597","https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W1492103595","https://openalex.org/W2352448290","https://openalex.org/W1864774435","https://openalex.org/W2380820513","https://openalex.org/W2913146933","https://openalex.org/W2372385138"],"abstract_inverted_index":{"Diffusion":[0],"models":[1],"have":[2],"shown":[3],"promising":[4],"results":[5,48],"in":[6,49,188],"single-channel":[7],"speech":[8,21,117],"enhancement,":[9],"using":[10,88,119,126,140],"a":[11,23,50,72,120,178],"task-adapted":[12],"diffusion":[13,83,156,186],"process":[14,53,108],"for":[15,34],"the":[16,30,43,61,77,82,85,89,96,101,106,111,115,133,137,151,162,185],"conditional":[17],"generation":[18],"of":[19,146,153,164],"clean":[20,116],"given":[22],"noisy":[24],"mixture.":[25],"However,":[26],"at":[27],"test":[28],"time,":[29],"neural":[31],"network":[32],"used":[33],"score":[35,92],"estimation":[36],"is":[37],"called":[38],"multiple":[39],"times":[40],"to":[41,114,167],"solve":[42],"iterative":[44],"reverse":[45,107],"process.":[46],"This":[47],"slow":[51],"inference":[52],"and":[54,109,181,191],"causes":[55],"discretization":[56],"errors":[57],"that":[58,125,173],"accumulate":[59],"over":[60],"sampling":[62],"trajectory.":[63],"In":[64,76,95],"this":[65,127,189],"paper,":[66],"we":[67,80,99,171],"address":[68],"these":[69],"limitations":[70],"through":[71],"two-stage":[73],"training":[74,129],"approach.":[75],"first":[78],"stage,":[79,98],"train":[81],"model":[84,139],"usual":[86,154],"way":[87],"generative":[90,155],"denoising":[91],"matching":[93],"loss.":[94,122],"second":[97,128],"compute":[100],"enhanced":[102],"signal":[103],"by":[104],"solving":[105],"compare":[110],"resulting":[112],"estimate":[113],"target":[118],"predictive":[121,197],"We":[123],"show":[124,172],"stage":[130],"enables":[131],"achieving":[132],"same":[134],"performance":[135,152,180],"as":[136],"baseline":[138,187],"only":[141],"5":[142],"function":[143,148,165],"evaluations":[144,166],"instead":[145],"60":[147],"evaluations.":[149],"While":[150],"algorithms":[157],"drops":[158],"dramatically":[159],"when":[160],"lowering":[161],"number":[163],"obtain":[168],"single-step":[169],"diffusion,":[170],"our":[174],"proposed":[175],"method":[176],"keeps":[177],"steady":[179],"therefore":[182],"largely":[183],"outperforms":[184],"setting":[190],"also":[192],"generalizes":[193],"better":[194],"than":[195],"its":[196],"counterpart":[198],"<sup":[199],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[200],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[201],".":[202]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
