{"id":"https://openalex.org/W3016056257","doi":"https://doi.org/10.1109/lsp.2020.3040693","title":"WaveCRN: An Efficient Convolutional Recurrent Neural Network for End-to-End Speech Enhancement","display_name":"WaveCRN: An Efficient Convolutional Recurrent Neural Network for End-to-End Speech Enhancement","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3016056257","doi":"https://doi.org/10.1109/lsp.2020.3040693","mag":"3016056257"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2020.3040693","is_oa":true,"landing_page_url":"https://doi.org/10.1109/lsp.2020.3040693","pdf_url":"https://ieeexplore.ieee.org/ielx7/97/4358004/09272838.pdf","source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ieeexplore.ieee.org/ielx7/97/4358004/09272838.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Tsun-An Hsieh","orcid":null},"institutions":[{"id":"https://openalex.org/I4210086894","display_name":"Research Center for Information Technology Innovation, Academia Sinica","ror":"https://ror.org/000zgvm20","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086894","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Tsun-An Hsieh","raw_affiliation_strings":["Research Center for Information Technology Innovation, Academia Sinica, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Research Center for Information Technology Innovation, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210086894"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hsin-Min Wang","orcid":"https://orcid.org/0000-0003-3599-5071"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hsin-Min Wang","raw_affiliation_strings":["Institute of Information Science, Academia Sinica, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xugang Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xugang Lu","raw_affiliation_strings":["NICT, Koganei, Japan"],"affiliations":[{"raw_affiliation_string":"NICT, Koganei, Japan","institution_ids":[]}]},{"author_position":"last","author":{"id":null,"display_name":"Yu Tsao","orcid":"https://orcid.org/0000-0001-6956-0418"},"institutions":[{"id":"https://openalex.org/I4210086894","display_name":"Research Center for Information Technology Innovation, Academia Sinica","ror":"https://ror.org/000zgvm20","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086894","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yu Tsao","raw_affiliation_strings":["Research Center for Information Technology Innovation, Academia Sinica, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Research Center for Information Technology Innovation, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210086894"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210086894"],"apc_list":null,"apc_paid":null,"fwci":6.5532,"has_fulltext":true,"cited_by_count":74,"citation_normalized_percentile":{"value":0.9763182,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"27","issue":null,"first_page":"2149","last_page":"2153"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9426000118255615,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9426000118255615,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.0272000003606081,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.013399999588727951,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.7958999872207642},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.625},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.5351999998092651},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.5083000063896179},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4927999973297119},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4860999882221222},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.4471000134944916},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.421099990606308},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.3677000105381012}],"concepts":[{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.7958999872207642},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7818999886512756},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.625},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6068999767303467},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.5351999998092651},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.5083000063896179},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4927999973297119},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4860999882221222},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4729999899864197},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.4471000134944916},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.421099990606308},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3677000105381012},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3666999936103821},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.35910001397132874},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.3587000072002411},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.35519999265670776},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.334199994802475},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.334199994802475},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3287999927997589},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.32330000400543213},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.32170000672340393},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.28999999165534973},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.2782000005245209},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2623000144958496}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lsp.2020.3040693","is_oa":true,"landing_page_url":"https://doi.org/10.1109/lsp.2020.3040693","pdf_url":"https://ieeexplore.ieee.org/ielx7/97/4358004/09272838.pdf","source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2004.04098","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2004.04098","pdf_url":"https://arxiv.org/pdf/2004.04098","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1109/lsp.2020.3040693","is_oa":true,"landing_page_url":"https://doi.org/10.1109/lsp.2020.3040693","pdf_url":"https://ieeexplore.ieee.org/ielx7/97/4358004/09272838.pdf","source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3016056257.pdf","grobid_xml":"https://content.openalex.org/works/W3016056257.grobid-xml"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W1897240248","https://openalex.org/W1963950237","https://openalex.org/W1983108229","https://openalex.org/W2044893557","https://openalex.org/W2094721231","https://openalex.org/W2141998673","https://openalex.org/W2144404214","https://openalex.org/W2160215673","https://openalex.org/W2309400744","https://openalex.org/W2516342150","https://openalex.org/W2550397165","https://openalex.org/W2603567530","https://openalex.org/W2605589342","https://openalex.org/W2609317876","https://openalex.org/W2649558613","https://openalex.org/W2747161606","https://openalex.org/W2774389566","https://openalex.org/W2802304149","https://openalex.org/W2889134433","https://openalex.org/W2889442120","https://openalex.org/W2889500840","https://openalex.org/W2937484199","https://openalex.org/W2937525188","https://openalex.org/W2962843322","https://openalex.org/W2962866211","https://openalex.org/W2963045393","https://openalex.org/W2963103134","https://openalex.org/W2963174729","https://openalex.org/W2963341071","https://openalex.org/W2963828919","https://openalex.org/W2969917057","https://openalex.org/W2971417062","https://openalex.org/W2971758476","https://openalex.org/W2972443522","https://openalex.org/W2974481323","https://openalex.org/W2991361823","https://openalex.org/W2998678989","https://openalex.org/W3011129471","https://openalex.org/W4253928870","https://openalex.org/W6679009796","https://openalex.org/W6684683940","https://openalex.org/W6696830301","https://openalex.org/W6712560600","https://openalex.org/W6713117669","https://openalex.org/W6713658392","https://openalex.org/W6729059855","https://openalex.org/W6766967953","https://openalex.org/W6773839719"],"related_works":[],"abstract_inverted_index":{"Due":[0],"to":[1,20,62,96,110,141,182,218],"the":[2,22,25,28,98,112,116,148,163,167,174,178,183,208,211],"simple":[3,105],"design":[4],"pipeline,":[5],"end-to-end":[6],"(E2E)":[7],"neural":[8,83,123],"models":[9,48,79],"for":[10,49],"speech":[11,34,99,192,198,202],"enhancement":[12,161],"(SE)":[13],"have":[14],"attracted":[15],"great":[16],"interest.":[17],"In":[18,65,139],"order":[19,140],"improve":[21],"performance":[23],"of":[24,33,115],"E2E":[26,47,72],"model,":[27,74],"local":[29],"and":[30,102,125,200,210,227],"sequential":[31,113],"properties":[32,52],"should":[35],"be":[36,63,129],"efficiently":[37,130],"taken":[38],"into":[39],"account":[40],"when":[41],"modelling.":[42],"However,":[43],"in":[44,132,147,166,191],"most":[45],"current":[46],"SE,":[50],"these":[51],"are":[53,59],"either":[54],"not":[55],"fully":[56],"considered":[57],"or":[58,86],"too":[60],"complex":[61],"realized.":[64],"this":[66,170],"letter,":[67],"we":[68,151],"propose":[69],"an":[70],"efficient":[71],"SE":[73],"termed":[75],"WaveCRN.":[76],"Compared":[77],"with":[78,134,207,222],"based":[80],"on":[81,162,197],"convolutional":[82],"networks":[84,124],"(CNN)":[85],"long":[87],"short-term":[88],"memory":[89],"(LSTM),":[90],"WaveCRN":[91,215],"uses":[92],"a":[93,103,153],"CNN":[94],"module":[95,109],"capture":[97],"locality":[100,117],"features":[101],"stacked":[104],"recurrent":[106,122],"units":[107],"(SRU)":[108],"model":[111,137,225],"property":[114],"features.":[118],"Different":[119],"from":[120,173],"conventional":[121],"LSTM,":[126],"SRU":[127,209],"can":[128],"parallelized":[131],"calculation,":[133],"even":[135],"fewer":[136],"parameters.":[138],"more":[142],"effectively":[143],"suppress":[144],"noise":[145],"components":[146],"noisy":[149,184],"speech,":[150],"derive":[152],"novel":[154],"restricted":[155,212],"feature":[156,164,213],"masking":[157],"approach,":[158],"which":[159,187],"performs":[160,216],"maps":[165],"hidden":[168],"layers;":[169],"is":[171,188],"different":[172],"approaches":[175,221],"that":[176,206],"apply":[177],"estimated":[179],"ratio":[180],"mask":[181],"spectral":[185],"features,":[186],"commonly":[189],"used":[190],"separation":[193],"methods.":[194],"Experimental":[195],"results":[196],"denoising":[199],"compressed":[201],"restoration":[203],"tasks":[204],"confirm":[205],"map,":[214],"comparably":[217],"other":[219],"state-of-the-art":[220],"notably":[223],"reduced":[224],"complexity":[226],"inference":[228],"time.":[229]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":17},{"year":2022,"cited_by_count":15},{"year":2021,"cited_by_count":11}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2020-04-17T00:00:00"}
