{"id":"https://openalex.org/W2789131247","doi":"https://doi.org/10.1109/icassp.2018.8462155","title":"Convolutional-Recurrent Neural Networks for Speech Enhancement","display_name":"Convolutional-Recurrent Neural Networks for Speech Enhancement","publication_year":2018,"publication_date":"2018-04-01","ids":{"openalex":"https://openalex.org/W2789131247","doi":"https://doi.org/10.1109/icassp.2018.8462155","mag":"2789131247"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2018.8462155","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462155","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1805.00579","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100699154","display_name":"Han Zhao","orcid":"https://orcid.org/0000-0001-6770-5396"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]},{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Han Zhao","raw_affiliation_strings":["Microsoft Research","[Machine Learning Department, Carnegie Mellon University, Pittsburgh, PA USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]},{"raw_affiliation_string":"[Machine Learning Department, Carnegie Mellon University, Pittsburgh, PA USA]","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084618329","display_name":"Shuayb Zarar","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shuayb Zarar","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA","[Microsoft Research, One Microsoft Way, Redmond, WA, USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"[Microsoft Research, One Microsoft Way, Redmond, WA, USA]","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007425970","display_name":"Ivan Tashev","orcid":"https://orcid.org/0000-0002-2263-2047"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ivan Tashev","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA","[Microsoft Research, One Microsoft Way, Redmond, WA, USA]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"[Microsoft Research, One Microsoft Way, Redmond, WA, USA]","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066868860","display_name":"Chin\u2010Hui Lee","orcid":"https://orcid.org/0000-0002-1892-2551"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chin-Hui Lee","raw_affiliation_strings":["School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, USA","<org_name>School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, USA</org_name>"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"<org_name>School of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta, GA, USA</org_name>","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.8314,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.72410871,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2401","last_page":"2405"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pesq","display_name":"PESQ","score":0.9027036428451538},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8017902374267578},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6958138346672058},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.6919642686843872},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.6349457502365112},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6079509854316711},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5653362274169922},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5468730926513672},{"id":"https://openalex.org/keywords/perceptron","display_name":"Perceptron","score":0.5234776735305786},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.5128982067108154},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.41356968879699707},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3953242897987366},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3526727855205536},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.23798835277557373},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.08026084303855896},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.056356608867645264}],"concepts":[{"id":"https://openalex.org/C103734657","wikidata":"https://www.wikidata.org/wiki/Q2739975","display_name":"PESQ","level":4,"score":0.9027036428451538},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8017902374267578},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6958138346672058},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.6919642686843872},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.6349457502365112},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6079509854316711},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5653362274169922},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5468730926513672},{"id":"https://openalex.org/C60908668","wikidata":"https://www.wikidata.org/wiki/Q690207","display_name":"Perceptron","level":3,"score":0.5234776735305786},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.5128982067108154},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.41356968879699707},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3953242897987366},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3526727855205536},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.23798835277557373},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.08026084303855896},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.056356608867645264},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icassp.2018.8462155","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462155","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1805.00579","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1805.00579","pdf_url":"https://arxiv.org/pdf/1805.00579","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2789131247","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/1805.00579","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1805.00579","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1805.00579","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1805.00579","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1805.00579","pdf_url":"https://arxiv.org/pdf/1805.00579","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.5299999713897705,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2789131247.pdf","grobid_xml":"https://content.openalex.org/works/W2789131247.grobid-xml"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W6908809","https://openalex.org/W1495679096","https://openalex.org/W1538759723","https://openalex.org/W1603327663","https://openalex.org/W2044893557","https://openalex.org/W2064675550","https://openalex.org/W2078528584","https://openalex.org/W2091432990","https://openalex.org/W2105393299","https://openalex.org/W2121973264","https://openalex.org/W2128653836","https://openalex.org/W2137983211","https://openalex.org/W2160815625","https://openalex.org/W2193413348","https://openalex.org/W2290318471","https://openalex.org/W2394932179","https://openalex.org/W2405774341","https://openalex.org/W2486913545","https://openalex.org/W2507424718","https://openalex.org/W2747161606","https://openalex.org/W4253928870","https://openalex.org/W6632305550","https://openalex.org/W6679009796","https://openalex.org/W6687566353","https://openalex.org/W6696830301","https://openalex.org/W6711962127","https://openalex.org/W6713658392"],"related_works":["https://openalex.org/W2962843322","https://openalex.org/W3003302463","https://openalex.org/W2791347279","https://openalex.org/W2969643681","https://openalex.org/W2984203607","https://openalex.org/W2999733888","https://openalex.org/W2891072262","https://openalex.org/W3006411128","https://openalex.org/W3015844538","https://openalex.org/W2791521493","https://openalex.org/W3109018774","https://openalex.org/W3165776651","https://openalex.org/W3177933256","https://openalex.org/W3010074710","https://openalex.org/W1995562189","https://openalex.org/W2232263742","https://openalex.org/W2940064443","https://openalex.org/W2787110826","https://openalex.org/W2516608830","https://openalex.org/W2964292098"],"abstract_inverted_index":{"We":[0],"propose":[1],"an":[2],"end-to-end":[3],"model":[4,16,81,86,111],"based":[5],"on":[6,95,102,121,126],"convolutional":[7,48],"and":[8,20,49,67,91,98,124],"recurrent":[9,50],"neural":[10,51],"networks":[11],"for":[12],"speech":[13,75],"enhancement.":[14],"Our":[15],"is":[17,88],"purely":[18],"data-driven":[19],"does":[21],"not":[22],"make":[23],"any":[24],"assumptions":[25],"about":[26],"the":[27,30,33,65,78],"type":[28],"or":[29],"stationarity":[31],"of":[32,74,80],"noise.":[34,100,128],"In":[35],"contrast":[36],"to":[37,59,119],"existing":[38,113],"methods":[39],"that":[40,87,109],"use":[41],"multilayer":[42],"perceptrons":[43],"(MLPs),":[44],"we":[45,83,107],"employ":[46],"both":[47,64,96],"network":[52],"architectures.":[53],"Thus,":[54],"our":[55,110],"approach":[56],"allows":[57],"us":[58],"exploit":[60],"local":[61],"structures":[62],"in":[63],"frequency":[66],"temporal":[68],"domains.":[69],"By":[70],"incorporating":[71],"prior":[72],"knowledge":[73],"signals":[76],"into":[77],"design":[79],"structures,":[82],"build":[84],"a":[85],"more":[89],"data-efficient":[90],"achieves":[92],"better":[93],"generalization":[94],"seen":[97,122],"unseen":[99,127],"Based":[101],"experiments":[103],"with":[104],"synthetic":[105],"data,":[106],"demonstrate":[108],"outperforms":[112],"methods,":[114],"improving":[115],"PESQ":[116],"by":[117],"up":[118],"0.6":[120],"noise":[123],"0.64":[125]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
