{"id":"https://openalex.org/W4403059241","doi":"https://doi.org/10.1109/taslp.2024.3473318","title":"Unsupervised Speech Enhancement Using Optimal Transport and Speech Presence Probability","display_name":"Unsupervised Speech Enhancement Using Optimal Transport and Speech Presence Probability","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4403059241","doi":"https://doi.org/10.1109/taslp.2024.3473318"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3473318","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3473318","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101900435","display_name":"Wenbin Jiang","orcid":"https://orcid.org/0000-0002-4063-8952"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbin Jiang","raw_affiliation_strings":["School of Communication Engineering, Hangzhou Dianzi University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-4063-8952","affiliations":[{"raw_affiliation_string":"School of Communication Engineering, Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043098653","display_name":"Kai Yu","orcid":"https://orcid.org/0000-0002-7102-9826"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Yu","raw_affiliation_strings":["X-LANCE Lab, Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-7102-9826","affiliations":[{"raw_affiliation_string":"X-LANCE Lab, Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034251794","display_name":"Fei Wen","orcid":"https://orcid.org/0000-0002-3083-9611"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Wen","raw_affiliation_strings":["Department of Electronic Engineering, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-3083-9611","affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2229,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.79421273,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"32","issue":null,"first_page":"4445","last_page":"4455"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13289","display_name":"Infant Health and Development","score":0.9520999789237976,"subfield":{"id":"https://openalex.org/subfields/3611","display_name":"Pharmacy"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9498999714851379,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5653978586196899},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5452131032943726},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.49404624104499817},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3002668619155884},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.059631168842315674}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5653978586196899},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5452131032943726},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.49404624104499817},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3002668619155884},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.059631168842315674}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3473318","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3473318","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4068114840","display_name":null,"funder_award_id":"62271314","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W2051428568","https://openalex.org/W2062164080","https://openalex.org/W2094721231","https://openalex.org/W2121973264","https://openalex.org/W2126942983","https://openalex.org/W2141998673","https://openalex.org/W2146324387","https://openalex.org/W2160815625","https://openalex.org/W2164452299","https://openalex.org/W2169147844","https://openalex.org/W2289394825","https://openalex.org/W2516001803","https://openalex.org/W2603567530","https://openalex.org/W2793477525","https://openalex.org/W2797972631","https://openalex.org/W2889442120","https://openalex.org/W2891759647","https://openalex.org/W2919115771","https://openalex.org/W2937568977","https://openalex.org/W2940275453","https://openalex.org/W2962866211","https://openalex.org/W2963341071","https://openalex.org/W2964058413","https://openalex.org/W2972592847","https://openalex.org/W2998832642","https://openalex.org/W3015199127","https://openalex.org/W3015337486","https://openalex.org/W3096408984","https://openalex.org/W3097906045","https://openalex.org/W3147539069","https://openalex.org/W3160733670","https://openalex.org/W3161273075","https://openalex.org/W3163464523","https://openalex.org/W3205181379","https://openalex.org/W4221143458","https://openalex.org/W4221149546","https://openalex.org/W4224917453","https://openalex.org/W4224925070","https://openalex.org/W4225014220","https://openalex.org/W4232282348","https://openalex.org/W4253928870","https://openalex.org/W4292969786","https://openalex.org/W4296069327","https://openalex.org/W4367281387","https://openalex.org/W4375869466","https://openalex.org/W4383961998","https://openalex.org/W6684191040","https://openalex.org/W6735913928","https://openalex.org/W6757632829","https://openalex.org/W6779669310","https://openalex.org/W6803559545","https://openalex.org/W6847363464","https://openalex.org/W6854977413"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3096184950","https://openalex.org/W4231424160","https://openalex.org/W2275432853"],"abstract_inverted_index":{"Speech":[0],"enhancement":[1,69,91],"models":[2],"based":[3,102],"on":[4,103,184,208],"deep":[5],"learning":[6,65,156],"are":[7],"typically":[8],"trained":[9],"in":[10,93],"a":[11,15,33,99,107],"supervised":[12,201,221],"manner,":[13],"requiring":[14],"substantial":[16],"amount":[17],"of":[18,36,51,118,124,173,215],"paired":[19,75],"noisy-to-clean":[20,76],"speech":[21,68,90,105,132,205],"data":[22,29],"for":[23,67,74,153],"training.":[24],"However,":[25],"synthetically":[26],"generated":[27],"training":[28,77,175],"can":[30],"only":[31],"capture":[32],"limited":[34],"range":[35],"realistic":[37],"environments,":[38],"and":[39,53,106,122,140,177],"it":[40],"is":[41],"often":[42],"challenging":[43],"or":[44],"even":[45],"impractical":[46],"to":[47,87,111,169,199],"gather":[48],"real-world":[49],"pairs":[50],"noisy":[52,104],"ground-truth":[54],"clean":[55,126],"speech.":[56,127],"To":[57],"overcome":[58],"this":[59],"limitation,":[60],"we":[61,129],"propose":[62],"an":[63,94,136,150],"unsupervised":[64,95,155],"approach":[66],"that":[70,123,191],"eliminates":[71],"the":[72,83,89,113,116,119,131,142,154,161,166,171,174,179,185,192,200,204,209,213,216],"need":[73],"data.":[78],"Specifically,":[79],"our":[80],"method":[81,194,218],"utilizes":[82],"optimal":[84],"transport":[85],"criterion":[86],"train":[88],"model":[92],"manner.":[96],"It":[97],"employs":[98],"fidelity":[100],"loss":[101,110,148,164,168],"distribution":[108,117],"divergence":[109],"minimize":[112],"difference":[114],"between":[115],"model's":[120],"output":[121],"unpaired":[125],"Further,":[128],"use":[130],"presence":[133],"probability":[134],"as":[135,149,165],"additional":[137],"optimization":[138],"objective":[139],"incorporate":[141],"short-time":[143],"Fourier":[144],"transform":[145],"(STFT)":[146],"domain":[147],"extra":[151],"term":[152],"loss.":[157],"We":[158],"also":[159],"apply":[160],"multi-resolution":[162],"STFT":[163],"validation":[167],"enhance":[170],"stability":[172],"process":[176],"improve":[178],"algorithm's":[180],"performance.":[181],"Experimental":[182],"results":[183,207],"VCTK":[186],"+":[187],"DEMAND":[188],"benchmark":[189,211],"demonstrate":[190],"proposed":[193,217],"achieves":[195],"competitive":[196],"performance":[197],"compared":[198],"methods.":[202],"Furthermore,":[203],"recognition":[206],"CHiME4":[210],"show":[212],"superiority":[214],"over":[219],"its":[220],"counterpart.":[222]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
