{"id":"https://openalex.org/W2057826000","doi":"https://doi.org/10.1109/taslp.2014.2372314","title":"Improving robustness of deep neural network acoustic models via speech separation and joint adaptive training","display_name":"Improving robustness of deep neural network acoustic models via speech separation and joint adaptive training","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2057826000","doi":"https://doi.org/10.1109/taslp.2014.2372314","mag":"2057826000","pmid":"https://pubmed.ncbi.nlm.nih.gov/26973851"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2014.2372314","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2014.2372314","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/4784988","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000078382","display_name":"Arun Narayanan","orcid":"https://orcid.org/0009-0008-3325-8928"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Arun Narayanan","raw_affiliation_strings":["Department of Computer Science and Engineering, The Ohio State University, Columbus, OH 43210 USA. He is now with Google, Inc., Mountain View, CA 94043 USA ( narayaar@cse.ohio-state.edu )","Google, Inc., Mountain View, CA and The Department of Computer Science and Engineering, The Ohio State University, Columbus, OH#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Ohio State University, Columbus, OH 43210 USA. He is now with Google, Inc., Mountain View, CA 94043 USA ( narayaar@cse.ohio-state.edu )","institution_ids":[]},{"raw_affiliation_string":"Google, Inc., Mountain View, CA and The Department of Computer Science and Engineering, The Ohio State University, Columbus, OH#TAB#","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051837453","display_name":"DeLiang Wang","orcid":"https://orcid.org/0000-0001-8195-6319"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"DeLiang Wang","raw_affiliation_strings":["Department of Computer Science and Engineering and Center for Cognitive and Brain Sciences, The Ohio State University, Columbus, OH 43210 USA ( dwang@cse.ohio-state.edu )","[Department of Computer Science and Engineering & Center for Cognitive and Brain Sciences, The Ohio State University, Columbus, OH, USA]"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering and Center for Cognitive and Brain Sciences, The Ohio State University, Columbus, OH 43210 USA ( dwang@cse.ohio-state.edu )","institution_ids":[]},{"raw_affiliation_string":"[Department of Computer Science and Engineering & Center for Cognitive and Brain Sciences, The Ohio State University, Columbus, OH, USA]","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5000078382"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":6.7866,"has_fulltext":false,"cited_by_count":65,"citation_normalized_percentile":{"value":0.97525573,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"23","issue":"1","first_page":"1","last_page":"1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7486644983291626},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6550973057746887},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6227134466171265},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.568523645401001},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5641106963157654},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5458709001541138},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.4797831177711487},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.4668805003166199},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4349236786365509},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.42695170640945435},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40590354800224304},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38498711585998535},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.28867483139038086}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7486644983291626},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6550973057746887},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6227134466171265},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.568523645401001},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5641106963157654},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5458709001541138},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.4797831177711487},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.4668805003166199},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4349236786365509},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.42695170640945435},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40590354800224304},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38498711585998535},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.28867483139038086},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/taslp.2014.2372314","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2014.2372314","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmid:26973851","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/26973851","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM transactions on audio, speech, and language processing","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:4784988","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/4784988","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE/ACM Trans Audio Speech Lang Process","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:pubmedcentral.nih.gov:4784988","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/4784988","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE/ACM Trans Audio Speech Lang Process","raw_type":"Text"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.47999998927116394,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2910152985","display_name":null,"funder_award_id":"R01 DC012048","funder_id":"https://openalex.org/F4320337352","funder_display_name":"National Institute on Deafness and Other Communication Disorders"},{"id":"https://openalex.org/G4907796938","display_name":null,"funder_award_id":"FA9550- 12-1-0130","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"}],"funders":[{"id":"https://openalex.org/F4320337352","display_name":"National Institute on Deafness and Other Communication Disorders","ror":"https://ror.org/04mhx6838"},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W71792008","https://openalex.org/W88081813","https://openalex.org/W1524333225","https://openalex.org/W1599512239","https://openalex.org/W1665214252","https://openalex.org/W1904365287","https://openalex.org/W1992475611","https://openalex.org/W1993409002","https://openalex.org/W1993882792","https://openalex.org/W1999736059","https://openalex.org/W2008971146","https://openalex.org/W2009934439","https://openalex.org/W2010362084","https://openalex.org/W2031647436","https://openalex.org/W2035576074","https://openalex.org/W2041638389","https://openalex.org/W2042141988","https://openalex.org/W2047919706","https://openalex.org/W2052667477","https://openalex.org/W2062164080","https://openalex.org/W2063224314","https://openalex.org/W2067117291","https://openalex.org/W2069681747","https://openalex.org/W2071310251","https://openalex.org/W2073612610","https://openalex.org/W2078528584","https://openalex.org/W2086139506","https://openalex.org/W2087126002","https://openalex.org/W2114016253","https://openalex.org/W2122009793","https://openalex.org/W2124149378","https://openalex.org/W2131342762","https://openalex.org/W2137075158","https://openalex.org/W2140595311","https://openalex.org/W2141411743","https://openalex.org/W2146502635","https://openalex.org/W2147768505","https://openalex.org/W2155445312","https://openalex.org/W2160815625","https://openalex.org/W2168379380","https://openalex.org/W2184045248","https://openalex.org/W2213952365","https://openalex.org/W2290318471","https://openalex.org/W2328757576","https://openalex.org/W2394967684","https://openalex.org/W2748434850","https://openalex.org/W2964138484","https://openalex.org/W6631362777","https://openalex.org/W6637242042","https://openalex.org/W6640036494","https://openalex.org/W6679429981","https://openalex.org/W6681435938","https://openalex.org/W6688428952","https://openalex.org/W6702079475","https://openalex.org/W6711908631"],"related_works":["https://openalex.org/W3081187864","https://openalex.org/W4380605396","https://openalex.org/W2803306015","https://openalex.org/W3133352777","https://openalex.org/W151018310","https://openalex.org/W2784059283","https://openalex.org/W4319779560","https://openalex.org/W2008737763","https://openalex.org/W4385611764","https://openalex.org/W2519224033"],"abstract_inverted_index":{"Although":[0],"deep":[1],"neural":[2],"network":[3,122],"(DNN)":[4],"acoustic":[5,90,100,141],"models":[6],"are":[7,105],"known":[8],"to":[9,38,139,157,178],"be":[10],"inherently":[11],"noise":[12,62,187],"robust,":[13],"especially":[14],"with":[15,117,132,174],"matched":[16],"training":[17],"and":[18,29,89,102,120,131,188,196],"testing":[19],"data,":[20],"the":[21,72,97,125,140,158,163,180,186,193,197,223],"use":[22],"of":[23,213,218],"speech":[24,49,56,103,189],"separation":[25,50,67,88,104,194],"as":[26,137],"a":[27,47,84,209],"frontend":[28,148],"for":[30,99,202],"deriving":[31],"alternative":[32,175],"feature":[33,176,199],"representations":[34,177],"has":[35],"been":[36],"shown":[37],"improve":[39],"performance":[40,59,168],"in":[41,60],"challenging":[42],"environments.":[43],"We":[44,81,171],"first":[45],"present":[46],"supervised":[48],"system":[51,65,166,207],"that":[52,86],"significantly":[53],"improves":[54,149,167],"automatic":[55],"recognition":[57],"(ASR)":[58],"realistic":[61],"conditions.":[63],"The":[64],"performs":[66],"via":[68,92],"ratio":[69,74,146],"time-frequency":[70],"masking;":[71],"ideal":[73],"mask":[75],"(IRM)":[76],"is":[77,110],"estimated":[78],"using":[79,107],"DNNs.":[80],"then":[82],"propose":[83],"framework":[85],"unifies":[87],"modeling":[91,101],"joint":[93],"adaptive":[94],"training.":[95],"Since":[96],"modules":[98],"implemented":[106],"DNNs,":[108],"unification":[109],"done":[111],"by":[112,153,169],"introducing":[113],"additional":[114],"hidden":[115],"layers":[116],"fixed":[118],"weights":[119],"appropriate":[121],"architecture.":[123],"On":[124],"CHiME-2":[126],"medium-large":[127],"vocabulary":[128],"ASR":[129],"task,":[130],"log":[133,182],"mel":[134,183],"spectral":[135],"features":[136],"input":[138],"model,":[142],"an":[143,216],"independently":[144],"trained":[145,165],"masking":[147],"word":[150,210],"error":[151,211],"rates":[152],"10.9%":[154],"(relative)":[155],"compared":[156],"noisy":[159],"baseline.":[160],"In":[161],"comparison,":[162],"jointly":[164],"14.4%.":[170],"also":[172],"experiment":[173],"augment":[179],"standard":[181,198],"features,":[184],"like":[185],"estimates":[190],"obtained":[191],"from":[192],"module,":[195],"set":[200],"used":[201],"IRM":[203],"estimation.":[204],"Our":[205],"best":[206,225],"obtains":[208],"rate":[212],"15.4%":[214],"(absolute),":[215],"improvement":[217],"4.6":[219],"percentage":[220],"points":[221],"over":[222],"next":[224],"result":[226],"on":[227],"this":[228],"corpus.":[229]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":13},{"year":2017,"cited_by_count":7},{"year":2016,"cited_by_count":9},{"year":2015,"cited_by_count":7}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
