{"id":"https://openalex.org/W4372263414","doi":"https://doi.org/10.1109/icassp49357.2023.10096178","title":"Speech Enhancement with Intelligent Neural Homomorphic Synthesis","display_name":"Speech Enhancement with Intelligent Neural Homomorphic Synthesis","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372263414","doi":"https://doi.org/10.1109/icassp49357.2023.10096178"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096178","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096178","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103195569","display_name":"Shulin He","orcid":"https://orcid.org/0009-0002-0382-3515"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]},{"id":"https://openalex.org/I2722730","display_name":"Inner Mongolia University","ror":"https://ror.org/0106qb496","country_code":"CN","type":"education","lineage":["https://openalex.org/I2722730"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shulin He","raw_affiliation_strings":["Inner Mongolia University,College of Computer Science,China","Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China","College of Computer Science, Inner Mongolia University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Inner Mongolia University,College of Computer Science,China","institution_ids":["https://openalex.org/I2722730"]},{"raw_affiliation_string":"Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"College of Computer Science, Inner Mongolia University, China","institution_ids":["https://openalex.org/I2722730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072851763","display_name":"Wei Rao","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Rao","raw_affiliation_strings":["Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063136754","display_name":"Jinjiang Liu","orcid":"https://orcid.org/0000-0002-1543-9193"},"institutions":[{"id":"https://openalex.org/I2722730","display_name":"Inner Mongolia University","ror":"https://ror.org/0106qb496","country_code":"CN","type":"education","lineage":["https://openalex.org/I2722730"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinjiang Liu","raw_affiliation_strings":["Inner Mongolia University,College of Computer Science,China","College of Computer Science, Inner Mongolia University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Inner Mongolia University,College of Computer Science,China","institution_ids":["https://openalex.org/I2722730"]},{"raw_affiliation_string":"College of Computer Science, Inner Mongolia University, China","institution_ids":["https://openalex.org/I2722730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100450139","display_name":"Jun Chen","orcid":"https://orcid.org/0000-0001-7201-1989"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Chen","raw_affiliation_strings":["Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015448985","display_name":"Yukai Ju","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yukai Ju","raw_affiliation_strings":["Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100693230","display_name":"Xueliang Zhang","orcid":"https://orcid.org/0000-0002-0406-1105"},"institutions":[{"id":"https://openalex.org/I2722730","display_name":"Inner Mongolia University","ror":"https://ror.org/0106qb496","country_code":"CN","type":"education","lineage":["https://openalex.org/I2722730"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueliang Zhang","raw_affiliation_strings":["Inner Mongolia University,College of Computer Science,China","College of Computer Science, Inner Mongolia University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Inner Mongolia University,College of Computer Science,China","institution_ids":["https://openalex.org/I2722730"]},{"raw_affiliation_string":"College of Computer Science, Inner Mongolia University, China","institution_ids":["https://openalex.org/I2722730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084128157","display_name":"Yannan Wang","orcid":"https://orcid.org/0000-0001-7248-4954"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yannan Wang","raw_affiliation_strings":["Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078353046","display_name":"Shidong Shang","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shidong Shang","raw_affiliation_strings":["Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Corporation,Tencent Ethereal Audio Lab,Shenzhen,China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent Ethereal Audio Lab, Tencent Corporation, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.739,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.68193636,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vocal-tract","display_name":"Vocal tract","score":0.8591997623443604},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7396569848060608},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7299032807350159},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.6688039898872375},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.6549115180969238},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5617778897285461},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.553317666053772},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5452223420143127},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5038885474205017},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.4843517541885376},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.4780034124851227},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.4648057222366333},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.46282774209976196},{"id":"https://openalex.org/keywords/linear-predictive-coding","display_name":"Linear predictive coding","score":0.45533010363578796},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.4544657766819},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44285932183265686},{"id":"https://openalex.org/keywords/signal-processing","display_name":"Signal processing","score":0.4335249364376068},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4222080111503601},{"id":"https://openalex.org/keywords/speech-production","display_name":"Speech production","score":0.41887032985687256},{"id":"https://openalex.org/keywords/digital-signal-processing","display_name":"Digital signal processing","score":0.332461953163147},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.1658974289894104},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.09166198968887329},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07308009266853333}],"concepts":[{"id":"https://openalex.org/C47401133","wikidata":"https://www.wikidata.org/wiki/Q748953","display_name":"Vocal tract","level":2,"score":0.8591997623443604},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7396569848060608},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7299032807350159},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.6688039898872375},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.6549115180969238},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5617778897285461},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.553317666053772},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5452223420143127},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5038885474205017},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.4843517541885376},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.4780034124851227},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.4648057222366333},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.46282774209976196},{"id":"https://openalex.org/C59883199","wikidata":"https://www.wikidata.org/wiki/Q1826438","display_name":"Linear predictive coding","level":3,"score":0.45533010363578796},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.4544657766819},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44285932183265686},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.4335249364376068},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4222080111503601},{"id":"https://openalex.org/C43617652","wikidata":"https://www.wikidata.org/wiki/Q7575399","display_name":"Speech production","level":2,"score":0.41887032985687256},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.332461953163147},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.1658974289894104},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.09166198968887329},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07308009266853333},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096178","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096178","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.7400000095367432}],"awards":[],"funders":[{"id":"https://openalex.org/F4320330944","display_name":"Nature","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1536373746","https://openalex.org/W1604034532","https://openalex.org/W1968939597","https://openalex.org/W2128653836","https://openalex.org/W2168379380","https://openalex.org/W2696967604","https://openalex.org/W2889442120","https://openalex.org/W2937484199","https://openalex.org/W2952218014","https://openalex.org/W2962866211","https://openalex.org/W3015199127","https://openalex.org/W3023846453","https://openalex.org/W3095048608","https://openalex.org/W3096408984","https://openalex.org/W3096429957","https://openalex.org/W3097034112","https://openalex.org/W3099330747","https://openalex.org/W3147539069","https://openalex.org/W3160085755","https://openalex.org/W3162493033","https://openalex.org/W3162501355","https://openalex.org/W3162534564","https://openalex.org/W3165776651","https://openalex.org/W3198156015","https://openalex.org/W3198680319","https://openalex.org/W3205004157","https://openalex.org/W4224925070","https://openalex.org/W4224931274","https://openalex.org/W4283021642","https://openalex.org/W4285119904"],"related_works":["https://openalex.org/W1482212662","https://openalex.org/W2100012411","https://openalex.org/W1997579527","https://openalex.org/W2394579548","https://openalex.org/W2165083446","https://openalex.org/W2032268913","https://openalex.org/W2111212790","https://openalex.org/W2499712276","https://openalex.org/W34544436","https://openalex.org/W3211091508"],"abstract_inverted_index":{"Most":[0],"neural":[1,25],"network":[2,28,58,75],"speech":[3,7,30],"enhancement":[4],"models":[5,10],"ignore":[6],"production":[8],"mathematical":[9],"by":[11,113],"directly":[12],"mapping":[13],"Fourier":[14],"transform":[15],"spectrums":[16],"or":[17],"waveforms.":[18],"In":[19],"this":[20],"work,":[21],"we":[22,33,53],"propose":[23],"a":[24],"source":[26],"filter":[27],"for":[29],"enhancement.":[31],"Specifically,":[32],"use":[34,54],"homomorphic":[35],"signal":[36,51],"processing":[37],"and":[38,46,84,100],"cepstral":[39],"analysis":[40],"to":[41,64,80,116],"obtain":[42],"noisy":[43],"speech\u2019s":[44],"excitation":[45,83,99],"vocal":[47,85],"tract.":[48],"Unlike":[49],"traditional":[50],"processing,":[52],"an":[55],"attentive":[56,73],"recurrent":[57,74],"(ARN)":[59],"model":[60],"predicted":[61],"ratio":[62],"mask":[63],"replace":[65],"the":[66,82,97],"liftering":[67],"separation":[68],"function.":[69],"Then":[70],"two":[71],"convolutional":[72],"(CARN)":[76],"networks":[77],"are":[78],"used":[79],"predict":[81],"tract":[86],"of":[87],"clean":[88],"speech,":[89],"respectively.":[90],"The":[91],"system\u2019s":[92],"output":[93],"is":[94],"synthesized":[95],"from":[96],"estimated":[98],"vocal.":[101],"Experiments":[102],"prove":[103],"that":[104],"our":[105],"proposed":[106],"method":[107],"performs":[108],"better,":[109],"with":[110],"SI-SNR":[111],"improving":[112],"1.363dB":[114],"compared":[115],"FullSubNet.":[117]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
