{"id":"https://openalex.org/W7156784025","doi":"https://doi.org/10.48550/arxiv.2604.24199","title":"Speech Enhancement Based on Drifting Models","display_name":"Speech Enhancement Based on Drifting Models","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W7156784025","doi":"https://doi.org/10.48550/arxiv.2604.24199"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.24199","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24199","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.24199","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134756602","display_name":"Liang Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Liang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049890035","display_name":"Diego Caviedes-Nozal","orcid":"https://orcid.org/0000-0001-6756-3375"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Caviedes-Nozal, Diego","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134778832","display_name":"Bastiaan Kleijn","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kleijn, W. Bastiaan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064315768","display_name":"Longfei Yan","orcid":"https://orcid.org/0000-0003-4273-198X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Longfei Felix","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5006771634","display_name":"Rasmus Kongsgaard Olsson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olsson, Rasmus Kongsgaard","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5134756602"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9842000007629395,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9842000007629395,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.00430000014603138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.0020000000949949026,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.8007000088691711},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6040999889373779},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5565999746322632},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.5245000123977661},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.48420000076293945},{"id":"https://openalex.org/keywords/conditional-random-field","display_name":"Conditional random field","score":0.46939998865127563},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.4675000011920929},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.39590001106262207},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38359999656677246}],"concepts":[{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.8007000088691711},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6448000073432922},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6040999889373779},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5565999746322632},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5562000274658203},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.5245000123977661},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.48420000076293945},{"id":"https://openalex.org/C152565575","wikidata":"https://www.wikidata.org/wiki/Q1124538","display_name":"Conditional random field","level":2,"score":0.46939998865127563},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.4675000011920929},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.445499986410141},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.39590001106262207},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38359999656677246},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3763999938964844},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.37549999356269836},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3732999861240387},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.36390000581741333},{"id":"https://openalex.org/C2775953691","wikidata":"https://www.wikidata.org/wiki/Q5013874","display_name":"CRFS","level":3,"score":0.3343000113964081},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.32659998536109924},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.30379998683929443},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3005000054836273},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.2989000082015991},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.295199990272522},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2793999910354614},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.2535000145435333}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.24199","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24199","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.24199","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24199","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.6935514807701111}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,84],"propose":[1],"Speech":[2],"Enhancement":[3],"based":[4],"on":[5,23,74,109],"Drifting":[6,53],"Models":[7],"(DriftSE),":[8],"a":[9,37,52,55,91,99,105,120,129],"novel":[10],"generative":[11,102],"framework":[12,87],"that":[13,59,114],"formulates":[14],"denoising":[15],"as":[16],"an":[17],"equilibrium":[18],"problem.":[19],"Rather":[20],"than":[21,81],"relying":[22],"iterative":[24],"sampling,":[25],"DriftSE":[26,115],"natively":[27],"achieves":[28,116],"one-step":[29],"inference":[30],"by":[31,51,77],"evolving":[32],"the":[33,43,63,67,86,95,110],"pushforward":[34],"distribution":[35],"of":[36,66],"mapping":[38,93],"function":[39],"to":[40],"directly":[41],"match":[42],"clean":[44,68],"speech":[45,133],"distribution.":[46],"This":[47],"evolution":[48],"is":[49],"driven":[50],"Field,":[54],"learned":[56],"correction":[57],"vector":[58],"guides":[60],"samples":[61],"toward":[62],"high-density":[64],"regions":[65],"distribution,":[69],"which":[70],"naturally":[71],"facilitates":[72],"training":[73],"unpaired":[75],"data":[76],"matching":[78],"distributions":[79],"rather":[80],"paired":[82],"samples.":[83],"investigate":[85],"under":[88],"two":[89],"formulations:":[90],"direct":[92],"from":[94,104],"noisy":[96],"observation,":[97],"and":[98,127],"stochastic":[100],"conditional":[101],"model":[103],"Gaussian":[106],"prior.":[107],"Experiments":[108],"VoiceBank-DEMAND":[111],"benchmark":[112],"demonstrate":[113],"high-fidelity":[117],"enhancement":[118],"in":[119],"single":[121],"step,":[122],"outperforming":[123],"multi-step":[124],"diffusion":[125],"baselines":[126],"establishing":[128],"new":[130],"paradigm":[131],"for":[132],"enhancement.":[134]},"counts_by_year":[],"updated_date":"2026-05-22T06:13:13.366637","created_date":"2026-04-29T00:00:00"}
