{"id":"https://openalex.org/W4319586210","doi":"https://doi.org/10.1109/iscslp57327.2022.10037796","title":"Speech-enhanced and Noise-aware Networks for Robust Speech Recognition","display_name":"Speech-enhanced and Noise-aware Networks for Robust Speech Recognition","publication_year":2022,"publication_date":"2022-12-11","ids":{"openalex":"https://openalex.org/W4319586210","doi":"https://doi.org/10.1109/iscslp57327.2022.10037796"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp57327.2022.10037796","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp57327.2022.10037796","pdf_url":null,"source":{"id":"https://openalex.org/S4363607181","display_name":"2022 13th International Symposium on Chinese Spoken Language Processing (ISCSLP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 13th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048338308","display_name":"Hung-Shin Lee","orcid":"https://orcid.org/0000-0001-7044-9434"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]},{"id":"https://openalex.org/I4210086453","display_name":"Institute of Political Science, Academia Sinica","ror":"https://ror.org/001agqs13","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086453","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Hung-Shin Lee","raw_affiliation_strings":["Institute of Information Science,Academia Sinica","Academia Sinica, Institute of Information Science"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science,Academia Sinica","institution_ids":["https://openalex.org/I4210086453","https://openalex.org/I4210098366"]},{"raw_affiliation_string":"Academia Sinica, Institute of Information Science","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051612838","display_name":"Pin\u2010Yuan Chen","orcid":"https://orcid.org/0000-0003-3324-0695"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]},{"id":"https://openalex.org/I4210086453","display_name":"Institute of Political Science, Academia Sinica","ror":"https://ror.org/001agqs13","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086453","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Pin-Yuan Chen","raw_affiliation_strings":["Institute of Information Science,Academia Sinica","Academia Sinica, Institute of Information Science"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science,Academia Sinica","institution_ids":["https://openalex.org/I4210086453","https://openalex.org/I4210098366"]},{"raw_affiliation_string":"Academia Sinica, Institute of Information Science","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002404521","display_name":"Yao-Fei Cheng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210086453","display_name":"Institute of Political Science, Academia Sinica","ror":"https://ror.org/001agqs13","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086453","https://openalex.org/I84653119"]},{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yao-Fei Cheng","raw_affiliation_strings":["Institute of Information Science,Academia Sinica","Academia Sinica, Institute of Information Science"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science,Academia Sinica","institution_ids":["https://openalex.org/I4210086453","https://openalex.org/I4210098366"]},{"raw_affiliation_string":"Academia Sinica, Institute of Information Science","institution_ids":["https://openalex.org/I4210098366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044008055","display_name":"Yu Tsao","orcid":"https://orcid.org/0000-0001-6956-0418"},"institutions":[{"id":"https://openalex.org/I4210086894","display_name":"Research Center for Information Technology Innovation, Academia Sinica","ror":"https://ror.org/000zgvm20","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086894","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yu Tsao","raw_affiliation_strings":["Research Center for Information Technology Innovation,Academia Sinica","Academia Sinica, Research Center for Information Technology Innovation"],"affiliations":[{"raw_affiliation_string":"Research Center for Information Technology Innovation,Academia Sinica","institution_ids":["https://openalex.org/I4210086894"]},{"raw_affiliation_string":"Academia Sinica, Research Center for Information Technology Innovation","institution_ids":["https://openalex.org/I4210086894"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071214181","display_name":"Hsin\u2010Min Wang","orcid":"https://orcid.org/0000-0003-3599-5071"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]},{"id":"https://openalex.org/I4210086453","display_name":"Institute of Political Science, Academia Sinica","ror":"https://ror.org/001agqs13","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086453","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hsin-Min Wang","raw_affiliation_strings":["Institute of Information Science,Academia Sinica","Academia Sinica, Institute of Information Science"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science,Academia Sinica","institution_ids":["https://openalex.org/I4210086453","https://openalex.org/I4210098366"]},{"raw_affiliation_string":"Academia Sinica, Institute of Information Science","institution_ids":["https://openalex.org/I4210098366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5048338308"],"corresponding_institution_ids":["https://openalex.org/I4210086453","https://openalex.org/I4210098366"],"apc_list":null,"apc_paid":null,"fwci":0.3682,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.55450237,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"145","last_page":"149"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8191714882850647},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7981723546981812},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5752543807029724},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4771244525909424},{"id":"https://openalex.org/keywords/bigram","display_name":"Bigram","score":0.4703937768936157},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.46738186478614807},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.44949811697006226},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4441301226615906},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.4306556284427643},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.4302467107772827},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.420929878950119},{"id":"https://openalex.org/keywords/time-delay-neural-network","display_name":"Time delay neural network","score":0.41754770278930664},{"id":"https://openalex.org/keywords/trigram","display_name":"Trigram","score":0.39639604091644287},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.36579954624176025},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3652191758155823},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.35709887742996216},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.09313711524009705}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8191714882850647},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7981723546981812},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5752543807029724},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4771244525909424},{"id":"https://openalex.org/C108757681","wikidata":"https://www.wikidata.org/wiki/Q2773912","display_name":"Bigram","level":3,"score":0.4703937768936157},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.46738186478614807},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.44949811697006226},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4441301226615906},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.4306556284427643},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.4302467107772827},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.420929878950119},{"id":"https://openalex.org/C175202392","wikidata":"https://www.wikidata.org/wiki/Q2434543","display_name":"Time delay neural network","level":3,"score":0.41754770278930664},{"id":"https://openalex.org/C137546455","wikidata":"https://www.wikidata.org/wiki/Q3213474","display_name":"Trigram","level":2,"score":0.39639604091644287},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.36579954624176025},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3652191758155823},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.35709887742996216},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.09313711524009705}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp57327.2022.10037796","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp57327.2022.10037796","pdf_url":null,"source":{"id":"https://openalex.org/S4363607181","display_name":"2022 13th International Symposium on Chinese Spoken Language Processing (ISCSLP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 13th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.8199999928474426}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W2006129368","https://openalex.org/W2025768430","https://openalex.org/W2042141988","https://openalex.org/W2062164080","https://openalex.org/W2128653836","https://openalex.org/W2131342762","https://openalex.org/W2141411743","https://openalex.org/W2150769028","https://openalex.org/W2296167893","https://openalex.org/W2402146185","https://openalex.org/W2402201734","https://openalex.org/W2404019834","https://openalex.org/W2404126548","https://openalex.org/W2407080277","https://openalex.org/W2514722735","https://openalex.org/W2514741789","https://openalex.org/W2745772944","https://openalex.org/W2807791502","https://openalex.org/W2808939837","https://openalex.org/W2888867175","https://openalex.org/W2891756742","https://openalex.org/W2936774411","https://openalex.org/W2937170468","https://openalex.org/W2962959469","https://openalex.org/W2963341071","https://openalex.org/W2972320711","https://openalex.org/W2972412503","https://openalex.org/W2972555663","https://openalex.org/W2972664737","https://openalex.org/W3012383481","https://openalex.org/W3097573669","https://openalex.org/W3109196171","https://openalex.org/W3130351163","https://openalex.org/W4236328885","https://openalex.org/W4237168004","https://openalex.org/W6631362777","https://openalex.org/W6635900476","https://openalex.org/W6656414902","https://openalex.org/W6660953966","https://openalex.org/W6678809451","https://openalex.org/W6739901393","https://openalex.org/W6784855336","https://openalex.org/W6784933028"],"related_works":["https://openalex.org/W2011383762","https://openalex.org/W4327499987","https://openalex.org/W2048414027","https://openalex.org/W3173084154","https://openalex.org/W2940857995","https://openalex.org/W2940684586","https://openalex.org/W2809276897","https://openalex.org/W2031891814","https://openalex.org/W353876725","https://openalex.org/W2117086786"],"abstract_inverted_index":{"Compensation":[0],"for":[1,9,82,162],"channel":[2],"mismatch":[3],"and":[4,51,72,79,103,109,123,142,158,175],"noise":[5],"interference":[6],"is":[7,44,58,67],"essential":[8],"robust":[10],"automatic":[11],"speech":[12,15,49,52,66,71],"recognition.":[13,53],"Enhanced":[14],"has":[16],"been":[17],"introduced":[18],"into":[19,69,92],"the":[20,85,98,107,116,131,146,151,164,180,186,191],"multi-condition":[21],"training":[22,36],"of":[23,60,115,140,173],"acoustic":[24],"models":[25,161],"to":[26,46],"improve":[27],"their":[28],"generalization":[29],"ability.":[30],"In":[31,178],"this":[32],"paper,":[33],"a":[34,61,93,169],"noise-aware":[35],"framework":[37],"based":[38],"on":[39,145,190],"two":[40,132],"cascaded":[41],"neural":[42,120],"structures":[43],"proposed":[45,133,165,181],"jointly":[47],"optimize":[48],"enhancement":[50,56],"The":[54],"feature":[55],"module":[57,87],"composed":[59],"multi-task":[62],"autoencoder,":[63],"where":[64],"noisy":[65,80],"decomposed":[68],"clean":[70],"noise.":[73],"By":[74],"concatenating":[75],"its":[76,124],"enhanced,":[77],"noise-aware,":[78],"features":[81],"each":[83,89],"frame,":[84],"acoustic-modeling":[86],"maps":[88],"feature-augmented":[90],"frame":[91],"triphone":[94],"state":[95,111],"by":[96],"optimizing":[97],"lattice-free":[99],"maximum":[100],"mutual":[101],"information":[102],"cross":[104],"entropy":[105],"between":[106],"predicted":[108],"actual":[110],"sequences.":[112],"On":[113],"top":[114],"factorized":[117],"time":[118],"delay":[119],"network":[121],"(TDNN-F)":[122],"convolutional":[125],"variant":[126],"(CNN-TDNNF),":[127],"both":[128],"with":[129,150],"SpecAug,":[130],"systems":[134,154],"achieve":[135],"word":[136],"error":[137],"rate":[138],"(WER)":[139],"3.90%":[141],"3.55%,":[143],"respectively,":[144],"Aurora-4":[147],"task.":[148,193],"Compared":[149],"best":[152],"existing":[153],"that":[155],"use":[156],"bigram":[157],"trigram":[159],"language":[160],"decoding,":[163],"CNN-TDNNF-based":[166,182],"system":[167,183,189],"achieves":[168],"relative":[170],"WER":[171],"reduction":[172],"15.20%":[174],"33.53%,":[176],"respectively.":[177],"addition,":[179],"also":[184],"outperforms":[185],"baseline":[187],"CNN-TDNNF":[188],"AMI":[192]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
