{"id":"https://openalex.org/W3164605550","doi":"https://doi.org/10.1109/taslp.2021.3082282","title":"Towards Model Compression for Deep Learning Based Speech Enhancement","display_name":"Towards Model Compression for Deep Learning Based Speech Enhancement","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3164605550","doi":"https://doi.org/10.1109/taslp.2021.3082282","mag":"3164605550","pmid":"https://pubmed.ncbi.nlm.nih.gov/34179220"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2021.3082282","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3082282","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8224477","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058209019","display_name":"Ke Tan","orcid":"https://orcid.org/0000-0001-5073-8060"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ke Tan","raw_affiliation_strings":["Department of Computer Science and Engineering, The Ohio State University, Columbus, OH, 43210-1277 USA","Department of Computer Science and Engineering, The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Ohio State University, Columbus, OH, 43210-1277 USA","institution_ids":["https://openalex.org/I52357470"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051837453","display_name":"DeLiang Wang","orcid":"https://orcid.org/0000-0001-8195-6319"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"DeLiang Wang","raw_affiliation_strings":["Department of Computer Science and Engineering and the Center for Cognitive and Brain Sciences, The Ohio State University, Columbus, OH 43210-1277, USA","Department of Computer Science and Engineering, and the Center for Cognitive, and Brain Sciences, The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering and the Center for Cognitive and Brain Sciences, The Ohio State University, Columbus, OH 43210-1277, USA","institution_ids":["https://openalex.org/I52357470"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, and the Center for Cognitive, and Brain Sciences, The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5058209019"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":9.7217,"has_fulltext":false,"cited_by_count":80,"citation_normalized_percentile":{"value":0.98788731,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"29","issue":null,"first_page":"1785","last_page":"1794"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8186770677566528},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.7803337574005127},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.5342430472373962},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5065439939498901},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4726245403289795},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.46897539496421814},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4595325291156769},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.45758548378944397},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.4478147029876709},{"id":"https://openalex.org/keywords/performance-enhancement","display_name":"Performance enhancement","score":0.4412262439727783},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.4400906562805176},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.42803844809532166},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4145847260951996},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4066716730594635},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.16272887587547302},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.07972228527069092}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8186770677566528},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.7803337574005127},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.5342430472373962},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5065439939498901},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4726245403289795},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.46897539496421814},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4595325291156769},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.45758548378944397},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.4478147029876709},{"id":"https://openalex.org/C2986800882","wikidata":"https://www.wikidata.org/wiki/Q7168187","display_name":"Performance enhancement","level":2,"score":0.4412262439727783},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.4400906562805176},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.42803844809532166},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4145847260951996},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4066716730594635},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.16272887587547302},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.07972228527069092},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C99508421","wikidata":"https://www.wikidata.org/wiki/Q2678675","display_name":"Physical medicine and rehabilitation","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/taslp.2021.3082282","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3082282","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmid:34179220","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34179220","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM transactions on audio, speech, and language processing","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:8224477","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8224477","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE/ACM Trans Audio Speech Lang Process","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:pubmedcentral.nih.gov:8224477","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8224477","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE/ACM Trans Audio Speech Lang Process","raw_type":"Text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2910152985","display_name":null,"funder_award_id":"R01 DC012048","funder_id":"https://openalex.org/F4320337352","funder_display_name":"National Institute on Deafness and Other Communication Disorders"}],"funders":[{"id":"https://openalex.org/F4320317189","display_name":"Ohio Supercomputer Center","ror":"https://ror.org/01apna436"},{"id":"https://openalex.org/F4320337352","display_name":"National Institute on Deafness and Other Communication Disorders","ror":"https://ror.org/04mhx6838"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W160800111","https://openalex.org/W1552314771","https://openalex.org/W1690739335","https://openalex.org/W1724438581","https://openalex.org/W1821462560","https://openalex.org/W1974387177","https://openalex.org/W1983108229","https://openalex.org/W1987371344","https://openalex.org/W2069681747","https://openalex.org/W2113328646","https://openalex.org/W2114766824","https://openalex.org/W2119144962","https://openalex.org/W2119913432","https://openalex.org/W2124509324","https://openalex.org/W2125389748","https://openalex.org/W2127851351","https://openalex.org/W2134797427","https://openalex.org/W2141998673","https://openalex.org/W2145085734","https://openalex.org/W2156387975","https://openalex.org/W2167215970","https://openalex.org/W2221409856","https://openalex.org/W2279098554","https://openalex.org/W2460144244","https://openalex.org/W2508418541","https://openalex.org/W2516001803","https://openalex.org/W2552071709","https://openalex.org/W2561557072","https://openalex.org/W2612445135","https://openalex.org/W2734774145","https://openalex.org/W2736953746","https://openalex.org/W2752037867","https://openalex.org/W2785523195","https://openalex.org/W2809624076","https://openalex.org/W2937484199","https://openalex.org/W2948116732","https://openalex.org/W2950248853","https://openalex.org/W2952218014","https://openalex.org/W2962866211","https://openalex.org/W2963122961","https://openalex.org/W2963125010","https://openalex.org/W2963145730","https://openalex.org/W2963363373","https://openalex.org/W2963864497","https://openalex.org/W2964118293","https://openalex.org/W2964233199","https://openalex.org/W2964299589","https://openalex.org/W2965862774","https://openalex.org/W2972346666","https://openalex.org/W2972460025","https://openalex.org/W2991361823","https://openalex.org/W2998784869","https://openalex.org/W3012561096","https://openalex.org/W3028019732","https://openalex.org/W3099330747","https://openalex.org/W3103034221","https://openalex.org/W3163105696","https://openalex.org/W4297775537","https://openalex.org/W6637551013","https://openalex.org/W6637709462","https://openalex.org/W6638523607","https://openalex.org/W6676664377","https://openalex.org/W6677103964","https://openalex.org/W6677580257","https://openalex.org/W6678583879","https://openalex.org/W6679909955","https://openalex.org/W6682889407","https://openalex.org/W6684563725","https://openalex.org/W6695314431","https://openalex.org/W6737664043","https://openalex.org/W6743912273","https://openalex.org/W6747620207","https://openalex.org/W6753069482","https://openalex.org/W6795274757"],"related_works":["https://openalex.org/W1964985140","https://openalex.org/W1630865680","https://openalex.org/W2373767407","https://openalex.org/W2124672476","https://openalex.org/W3211091508","https://openalex.org/W3168109306","https://openalex.org/W2136586452","https://openalex.org/W68492818","https://openalex.org/W4220659530","https://openalex.org/W3164605550"],"abstract_inverted_index":{"The":[0],"use":[1],"of":[2,12,108,139],"deep":[3],"neural":[4],"networks":[5],"(DNNs)":[6],"has":[7],"dramatically":[8],"elevated":[9],"the":[10,16,69,95,106,126,137,140],"performance":[11,24],"speech":[13,43,74,144],"enhancement":[14,23,44,119],"over":[15],"last":[17],"decade.":[18],"However,":[19],"to":[20,40,67],"achieve":[21],"strong":[22],"typically":[25],"requires":[26],"a":[27],"large":[28,113],"DNN,":[29],"which":[30,76,134],"is":[31],"both":[32],"memory":[33],"and":[34,85,93],"computation":[35],"consuming,":[36],"making":[37],"it":[38],"difficult":[39],"deploy":[41],"such":[42],"systems":[45],"on":[46,131],"devices":[47],"with":[48,55],"limited":[49],"hardware":[50],"resources":[51],"or":[52],"in":[53],"applications":[54],"strict":[56],"latency":[57],"requirements.":[58],"In":[59,121],"this":[60],"study,":[61],"we":[62,123],"propose":[63],"two":[64],"compression":[65,97],"pipelines":[66],"reduce":[68],"model":[70],"size":[71],"for":[72,142],"DNN-based":[73],"enhancement,":[75],"incorporates":[77],"three":[78],"different":[79,110],"techniques:":[80],"sparse":[81],"regularization,":[82],"iterative":[83],"pruning":[84],"clustering-based":[86],"quantization.":[87],"We":[88],"systematically":[89],"investigate":[90],"these":[91],"techniques":[92],"evaluate":[94],"proposed":[96,127],"pipelines.":[98],"Experimental":[99],"results":[100],"demonstrate":[101],"that":[102,125],"our":[103],"approach":[104,128,141],"reduces":[105],"sizes":[107],"four":[109],"models":[111],"by":[112],"margins":[114],"without":[115],"significantly":[116],"sacrificing":[117],"their":[118],"performance.":[120],"addition,":[122],"find":[124],"performs":[129],"well":[130],"speaker":[132],"separation,":[133],"further":[135],"demonstrates":[136],"effectiveness":[138],"compressing":[143],"separation":[145],"models.":[146]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":22},{"year":2023,"cited_by_count":20},{"year":2022,"cited_by_count":13},{"year":2021,"cited_by_count":8}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
