{"id":"https://openalex.org/W4405709216","doi":"https://doi.org/10.1109/iscslp63861.2024.10799970","title":"Fast Sampling Based on Policy Gradient for Diffusion-Based Speech Enhancement","display_name":"Fast Sampling Based on Policy Gradient for Diffusion-Based Speech Enhancement","publication_year":2024,"publication_date":"2024-11-07","ids":{"openalex":"https://openalex.org/W4405709216","doi":"https://doi.org/10.1109/iscslp63861.2024.10799970"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp63861.2024.10799970","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10799970","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062945787","display_name":"Yubo Jiang","orcid":"https://orcid.org/0000-0002-9211-6558"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yubo Jiang","raw_affiliation_strings":["School of Computer Science and Technology, Xinjiang University"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang University","institution_ids":["https://openalex.org/I96908189"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056424666","display_name":"Zhihua Huang","orcid":"https://orcid.org/0000-0001-5710-5231"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhihua Huang","raw_affiliation_strings":["School of Computer Science and Technology, Xinjiang University"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang University","institution_ids":["https://openalex.org/I96908189"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5062945787"],"corresponding_institution_ids":["https://openalex.org/I96908189"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26693647,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"576","last_page":"580"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9793999791145325,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13289","display_name":"Infant Health and Development","score":0.9677000045776367,"subfield":{"id":"https://openalex.org/subfields/3611","display_name":"Pharmacy"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.7167924046516418},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.6542747616767883},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6272533535957336},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.6195207238197327},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.42737966775894165},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25759562849998474},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.1924501359462738},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.11132881045341492},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09875082969665527}],"concepts":[{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.7167924046516418},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.6542747616767883},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6272533535957336},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.6195207238197327},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.42737966775894165},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25759562849998474},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.1924501359462738},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.11132881045341492},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09875082969665527},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp63861.2024.10799970","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10799970","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6160969776","display_name":null,"funder_award_id":"12464060","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W2289394825","https://openalex.org/W2516001803","https://openalex.org/W2603567530","https://openalex.org/W2755577605","https://openalex.org/W2944162086","https://openalex.org/W2952218014","https://openalex.org/W2964058413","https://openalex.org/W2972466499","https://openalex.org/W3016056257","https://openalex.org/W3097945073","https://openalex.org/W4221144097","https://openalex.org/W4232282348","https://openalex.org/W4297841790","https://openalex.org/W4372266968","https://openalex.org/W4372347392","https://openalex.org/W4380434618","https://openalex.org/W4382202691","https://openalex.org/W4384080510","https://openalex.org/W4392903345","https://openalex.org/W6627932998","https://openalex.org/W6741832134","https://openalex.org/W6749927861","https://openalex.org/W6779823529","https://openalex.org/W6783182287","https://openalex.org/W6786375611","https://openalex.org/W6796042156","https://openalex.org/W6848572727","https://openalex.org/W6849663970","https://openalex.org/W6853228856"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W3096184950","https://openalex.org/W4231424160"],"abstract_inverted_index":{"The":[0,102],"effectiveness":[1],"of":[2,104,117],"diffusion-based":[3],"generative":[4,14],"models":[5,40],"in":[6,29,53],"speech":[7],"enhancement":[8,55],"tasks":[9],"has":[10],"been":[11],"reported.":[12],"These":[13],"methods":[15],"can":[16,91],"produce":[17],"high-quality":[18],"audio":[19,44,121],"with":[20],"less":[21],"distortions":[22],"and":[23,47,76,96,131],"outperform":[24],"their":[25],"discriminative":[26],"counterparts,":[27],"especially":[28],"mismatched":[30],"conditions.":[31],"However,":[32],"due":[33],"to":[34,50,135],"the":[35,54,68,78,83,89,115,133],"multi-step":[36],"denoising":[37],"property,":[38],"these":[39,59],"usually":[41],"have":[42],"slower":[43],"generation":[45],"speed":[46],"a":[48,63,72],"tendency":[49],"accumulate":[51],"errors":[52],"process.":[56],"To":[57],"address":[58],"issues,":[60],"we":[61],"propose":[62],"novel":[64],"approach":[65],"that":[66,108],"considers":[67],"reverse":[69],"process":[70],"as":[71],"sequential":[73],"decision-making":[74],"task":[75],"constructs":[77],"distribution":[79],"similarity":[80],"metric":[81],"into":[82],"training":[84],"objective.":[85],"In":[86],"this":[87],"way,":[88],"generator":[90],"learn":[92],"data":[93],"distributions":[94],"well":[95],"explore":[97],"more":[98],"efficient":[99],"sampling":[100],"trajectories.":[101],"results":[103],"our":[105,109],"experiments":[106],"indicate":[107],"proposed":[110],"method":[111],"not":[112],"only":[113],"reduces":[114],"number":[116],"steps":[118],"required":[119],"for":[120],"generation,":[122],"but":[123],"it":[124],"also":[125],"produces":[126],"even":[127],"higher":[128],"quality":[129],"samples":[130],"preserves":[132],"generalizability":[134],"unknown":[136],"noise.":[137]},"counts_by_year":[],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
