{"id":"https://openalex.org/W3100464335","doi":"https://doi.org/10.1109/slt48900.2021.9383624","title":"Dual Application of Speech Enhancement for Automatic Speech Recognition","display_name":"Dual Application of Speech Enhancement for Automatic Speech Recognition","publication_year":2021,"publication_date":"2021-01-19","ids":{"openalex":"https://openalex.org/W3100464335","doi":"https://doi.org/10.1109/slt48900.2021.9383624","mag":"3100464335"},"language":"en","primary_location":{"id":"doi:10.1109/slt48900.2021.9383624","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt48900.2021.9383624","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2011.03840","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050492887","display_name":"Ashutosh Pandey","orcid":"https://orcid.org/0000-0002-3352-7453"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ashutosh Pandey","raw_affiliation_strings":["The Ohio State University, USA","Ohio State University"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, USA","institution_ids":["https://openalex.org/I52357470"]},{"raw_affiliation_string":"Ohio State University","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069296252","display_name":"Chunxi Liu","orcid":"https://orcid.org/0000-0001-5441-9374"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]},{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL","US"],"is_corresponding":false,"raw_author_name":"Chunxi Liu","raw_affiliation_strings":["Facebook AI, USA","Facebook"],"affiliations":[{"raw_affiliation_string":"Facebook AI, USA","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377647","display_name":"Yun Wang","orcid":"https://orcid.org/0000-0002-9732-9245"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]},{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["IL","US"],"is_corresponding":false,"raw_author_name":"Yun Wang","raw_affiliation_strings":["Facebook AI, USA","Facebook"],"affiliations":[{"raw_affiliation_string":"Facebook AI, USA","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051165898","display_name":"Yatharth Saraf","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]},{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL","US"],"is_corresponding":false,"raw_author_name":"Yatharth Saraf","raw_affiliation_strings":["Facebook AI, USA","Facebook"],"affiliations":[{"raw_affiliation_string":"Facebook AI, USA","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"Facebook","institution_ids":["https://openalex.org/I2252078561"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5050492887"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":0.3078,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.50537057,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"223","last_page":"228"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8007251620292664},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.7117912173271179},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6673691272735596},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6574342250823975},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.641383945941925},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5096588134765625},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.48391515016555786},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.44480663537979126},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42183488607406616},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.4192155599594116},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.4138893485069275},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3567969799041748},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3059040606021881},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.10954833030700684}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8007251620292664},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.7117912173271179},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6673691272735596},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6574342250823975},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.641383945941925},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5096588134765625},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.48391515016555786},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.44480663537979126},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42183488607406616},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.4192155599594116},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.4138893485069275},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3567969799041748},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3059040606021881},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.10954833030700684},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/slt48900.2021.9383624","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt48900.2021.9383624","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2011.03840","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.03840","pdf_url":"https://arxiv.org/pdf/2011.03840","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3100464335","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2011.03840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2011.03840","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2011.03840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"doi:10.17023/1k5h-kn47","is_oa":true,"landing_page_url":"https://doi.org/10.17023/1k5h-kn47","pdf_url":null,"source":{"id":"https://openalex.org/S7407051697","display_name":"IEEE RESOURCE CENTERS","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2011.03840","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2011.03840","pdf_url":"https://arxiv.org/pdf/2011.03840","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3100464335.pdf","grobid_xml":"https://content.openalex.org/works/W3100464335.grobid-xml"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W1828163288","https://openalex.org/W2033256038","https://openalex.org/W2141998673","https://openalex.org/W2291877678","https://openalex.org/W2327501763","https://openalex.org/W2545177271","https://openalex.org/W2593116425","https://openalex.org/W2640112133","https://openalex.org/W2746192915","https://openalex.org/W2774389566","https://openalex.org/W2885185669","https://openalex.org/W2936774411","https://openalex.org/W2938901746","https://openalex.org/W2962866211","https://openalex.org/W2963250244","https://openalex.org/W2963414781","https://openalex.org/W2964058413","https://openalex.org/W2964121744","https://openalex.org/W2966212213","https://openalex.org/W2972436155","https://openalex.org/W2982471419","https://openalex.org/W2990501692","https://openalex.org/W2991361823","https://openalex.org/W3007227084","https://openalex.org/W3008898571","https://openalex.org/W3015197852","https://openalex.org/W3015280134","https://openalex.org/W3015312544","https://openalex.org/W3015315932","https://openalex.org/W3029982911","https://openalex.org/W3032514799","https://openalex.org/W3032859754","https://openalex.org/W3049430014","https://openalex.org/W3094911989","https://openalex.org/W3095311338","https://openalex.org/W3095962459","https://openalex.org/W6631190155","https://openalex.org/W6638749077","https://openalex.org/W6734260513","https://openalex.org/W6747398299","https://openalex.org/W6769564786","https://openalex.org/W6770425081","https://openalex.org/W6775557069"],"related_works":["https://openalex.org/W3142252347","https://openalex.org/W2998616931","https://openalex.org/W1897240248","https://openalex.org/W2771275309","https://openalex.org/W2760287881","https://openalex.org/W3109196171","https://openalex.org/W2401089611","https://openalex.org/W2606601002","https://openalex.org/W2996639726","https://openalex.org/W3133488653","https://openalex.org/W3011199534","https://openalex.org/W2405486865","https://openalex.org/W2891980359","https://openalex.org/W2971198148","https://openalex.org/W2786510951","https://openalex.org/W3132775009","https://openalex.org/W2062576667","https://openalex.org/W3135771040","https://openalex.org/W99561634","https://openalex.org/W3162646409"],"abstract_inverted_index":{"In":[0,50,77],"this":[1],"work,":[2],"we":[3,57,86],"exploit":[4,58],"speech":[5,31,79,121],"enhancement":[6,80,122,128],"for":[7,26,37,53],"improving":[8],"a":[9,20,42,47,59,88,105],"re-current":[10],"neural":[11],"network":[12,24],"transducer":[13],"(RNN-T)":[14],"based":[15,30,62,92,123,129],"ASR":[16,38,54,70,84],"system.":[17],"We":[18,99],"employ":[19],"dense":[21],"convolutional":[22],"recurrent":[23],"(DCRN)":[25],"complex":[27],"spectral":[28],"mapping":[29],"enhancement,":[32],"and":[33,46,74,96,112,131],"find":[34],"it":[35,52],"helpful":[36],"in":[39],"two":[40],"ways:":[41],"data":[43,55,124],"augmentation":[44],"technique,":[45],"preprocessing":[48],"frontend.":[49],"using":[51,78],"augmentation,":[56,125],"KL":[60],"divergence":[61],"consistency":[63],"loss":[64],"that":[65],"is":[66],"computed":[67],"between":[68],"the":[69],"outputs":[71],"of":[72,118],"original":[73],"enhanced":[75],"utterances.":[76],"as":[81],"an":[82,114],"effective":[83],"frontend,":[85],"propose":[87],"three-step":[89],"training":[90],"scheme":[91],"on":[93,104],"model":[94],"pretraining":[95],"feature":[97],"selection.":[98],"evaluate":[100],"our":[101],"proposed":[102],"techniques":[103],"challenging":[106],"social":[107],"media":[108],"English":[109],"video":[110],"dataset,":[111],"achieve":[113],"average":[115],"relative":[116],"improvement":[117],"11.2%":[119],"with":[120,127],"8.3%":[126],"preprocessing,":[130],"13.4%":[132],"when":[133],"combining":[134],"both.":[135]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
