{"id":"https://openalex.org/W4367281387","doi":"https://doi.org/10.1109/taslp.2023.3271151","title":"Speech Enhancement With Integration of Neural Homomorphic Synthesis and Spectral Masking","display_name":"Speech Enhancement With Integration of Neural Homomorphic Synthesis and Spectral Masking","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4367281387","doi":"https://doi.org/10.1109/taslp.2023.3271151"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2023.3271151","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3271151","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101900435","display_name":"Wenbin Jiang","orcid":"https://orcid.org/0000-0002-4063-8952"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbin Jiang","raw_affiliation_strings":["Department of Computer Science and Engineering, X-LANCE Lab, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-4063-8952","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, X-LANCE Lab, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043098653","display_name":"Kai Yu","orcid":"https://orcid.org/0000-0002-7102-9826"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Yu","raw_affiliation_strings":["Department of Computer Science and Engineering, X-LANCE Lab, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-7102-9826","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, X-LANCE Lab, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.4019,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.89569147,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"31","issue":null,"first_page":"1758","last_page":"1770"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.759695291519165},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.7303629517555237},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7169532179832458},{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.553106963634491},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.5166945457458496},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4719744622707367},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.43500077724456787},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37867358326911926},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3207188844680786},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.16266301274299622}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.759695291519165},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.7303629517555237},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7169532179832458},{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.553106963634491},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.5166945457458496},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4719744622707367},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.43500077724456787},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37867358326911926},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3207188844680786},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.16266301274299622},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2023.3271151","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3271151","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.6700000166893005}],"awards":[{"id":"https://openalex.org/G4068114840","display_name":null,"funder_award_id":"62271314","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":65,"referenced_works":["https://openalex.org/W1482149378","https://openalex.org/W1604034532","https://openalex.org/W2044893557","https://openalex.org/W2051428568","https://openalex.org/W2078528584","https://openalex.org/W2094721231","https://openalex.org/W2141998673","https://openalex.org/W2146324387","https://openalex.org/W2291877678","https://openalex.org/W2516001803","https://openalex.org/W2519091744","https://openalex.org/W2603567530","https://openalex.org/W2884585870","https://openalex.org/W2889442120","https://openalex.org/W2891759647","https://openalex.org/W2930648092","https://openalex.org/W2936774411","https://openalex.org/W2949558265","https://openalex.org/W2952218014","https://openalex.org/W2962866211","https://openalex.org/W2962935966","https://openalex.org/W2963341071","https://openalex.org/W2964058413","https://openalex.org/W2964089206","https://openalex.org/W2970006822","https://openalex.org/W3000389243","https://openalex.org/W3015199127","https://openalex.org/W3015626488","https://openalex.org/W3023846453","https://openalex.org/W3092028330","https://openalex.org/W3092864146","https://openalex.org/W3096408984","https://openalex.org/W3096429957","https://openalex.org/W3097627357","https://openalex.org/W3097777922","https://openalex.org/W3097906045","https://openalex.org/W3097945073","https://openalex.org/W3099330747","https://openalex.org/W3113290170","https://openalex.org/W3158779859","https://openalex.org/W3160085755","https://openalex.org/W3161950572","https://openalex.org/W3162188526","https://openalex.org/W3163464523","https://openalex.org/W3197912330","https://openalex.org/W3198680319","https://openalex.org/W3206809722","https://openalex.org/W3213188934","https://openalex.org/W4221143458","https://openalex.org/W4221149546","https://openalex.org/W4221150474","https://openalex.org/W4224917453","https://openalex.org/W4224925070","https://openalex.org/W4224932531","https://openalex.org/W4225298533","https://openalex.org/W4225905067","https://openalex.org/W4232282348","https://openalex.org/W4253928870","https://openalex.org/W4296069327","https://openalex.org/W6753412334","https://openalex.org/W6762114000","https://openalex.org/W6767111847","https://openalex.org/W6771763809","https://openalex.org/W6783867762","https://openalex.org/W6785764544"],"related_works":["https://openalex.org/W1986772939","https://openalex.org/W2037635165","https://openalex.org/W2738829087","https://openalex.org/W2127461790","https://openalex.org/W2069324367","https://openalex.org/W1911859126","https://openalex.org/W2542062716","https://openalex.org/W1505346162","https://openalex.org/W4200562864","https://openalex.org/W3096184950"],"abstract_inverted_index":{"Speech":[0],"enhancement":[1,22,51,89],"refers":[2],"to":[3,8,72,135,166],"suppressing":[4],"the":[5,10,16,44,56,63,69,74,78,82,110,115,128,137,142,148,153,157,168,172,176,184,195,203,217],"background":[6],"noise":[7,75],"improve":[9],"perceptual":[11],"quality":[12],"and":[13,39,59,95,104,122,141,152,175,194],"intelligibility":[14],"of":[15,171,206],"observed":[17],"noisy":[18,154,185],"speech.":[19,79,208],"Recently,":[20],"speech":[21,50,64,88,116,144,178,192,198],"algorithms":[23,33],"based":[24,34],"on":[25,35,55,211],"deep":[26],"neural":[27,92,106,111],"networks":[28],"(DNNs)":[29],"have":[30,40],"replaced":[31],"traditional":[32],"statistical":[36],"signal":[37,117,151],"processing":[38],"become":[41],"mainstream":[42],"in":[43],"research":[45],"field.":[46],"However,":[47],"most":[48],"DNN-based":[49],"methods":[52],"commonly":[53],"operate":[54],"frequency":[57],"domain":[58],"do":[60],"not":[61],"use":[62,101],"production":[65],"model,":[66],"which":[67],"makes":[68],"models":[70],"prone":[71],"under-suppress":[73],"or":[76],"over-suppress":[77],"To":[80],"address":[81],"shortcoming,":[83],"we":[84,100],"propose":[85],"a":[86,102],"novel":[87],"method":[90,219],"integrating":[91],"homomorphic":[93,112],"synthesis":[94,113],"complex":[96,169],"spectral":[97,158],"masking.":[98],"Specifically,":[99],"shared-encoder":[103],"multi-decoder":[105],"network":[107],"architecture.":[108],"For":[109,156],"branch,":[114,160],"is":[118,145,164,180,199],"separated":[119],"into":[120],"excitation":[121],"vocal":[123],"tract":[124],"components":[125,139],"through":[126],"liftering":[127],"cepstrum,":[129],"two":[130,188,204,212],"DNN":[131,162],"decoders":[132],"are":[133],"applied":[134],"estimate":[136,167,191],"target":[138,173],"independently,":[140],"denoised":[143,177],"synthesized":[146],"by":[147,182,201],"estimated":[149,207],"minimum-phase":[150],"phase.":[155],"masking":[159,183],"another":[161],"decoder":[163],"adopted":[165],"mask":[170],"spectrum,":[174],"spectrum":[179],"obtained":[181,200],"spectrum.":[186],"The":[187],"branches":[189,205],"respectively":[190],"signals,":[193],"final":[196],"enhanced":[197],"merging":[202],"Experimental":[209],"results":[210],"popular":[213],"datasets":[214],"show":[215],"that":[216],"proposed":[218],"achieves":[220],"state-of-the-art":[221],"level":[222],"performance,":[223],"with":[224],"only":[225],"920":[226],"K":[227],"model":[228],"parameters.":[229]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":7}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
