{"id":"https://openalex.org/W2296167893","doi":"https://doi.org/10.1109/taslp.2016.2528171","title":"A Joint Training Framework for Robust Automatic Speech Recognition","display_name":"A Joint Training Framework for Robust Automatic Speech Recognition","publication_year":2016,"publication_date":"2016-02-10","ids":{"openalex":"https://openalex.org/W2296167893","doi":"https://doi.org/10.1109/taslp.2016.2528171","mag":"2296167893"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2016.2528171","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2016.2528171","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101607498","display_name":"Zhong-Qiu Wang","orcid":"https://orcid.org/0000-0002-4204-9430"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhong-Qiu Wang","raw_affiliation_strings":["Department of Computer Science and Engineering, The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051837453","display_name":"DeLiang Wang","orcid":"https://orcid.org/0000-0001-8195-6319"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"DeLiang Wang","raw_affiliation_strings":["Center for Cognitive and Brain Sciences, The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"Center for Cognitive and Brain Sciences, The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101607498"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":16.1396,"has_fulltext":false,"cited_by_count":153,"citation_normalized_percentile":{"value":0.99374777,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"24","issue":"4","first_page":"796","last_page":"806"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8005741834640503},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7518800497055054},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7336512804031372},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7244419455528259},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6341338157653809},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.564125657081604},{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.5202699899673462},{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.5016655921936035},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.4807000756263733},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.45141035318374634},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42954716086387634},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3387182950973511},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.29671594500541687},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09089729189872742}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8005741834640503},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7518800497055054},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7336512804031372},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7244419455528259},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6341338157653809},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.564125657081604},{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.5202699899673462},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.5016655921936035},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.4807000756263733},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.45141035318374634},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42954716086387634},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3387182950973511},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.29671594500541687},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09089729189872742},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2016.2528171","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2016.2528171","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.7099999785423279,"display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G8822322065","display_name":null,"funder_award_id":"FA9550-12-1-0130","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G8888347223","display_name":null,"funder_award_id":"IIS-1409431","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":75,"referenced_works":["https://openalex.org/W160800111","https://openalex.org/W1482149378","https://openalex.org/W1524333225","https://openalex.org/W1583048384","https://openalex.org/W1598508708","https://openalex.org/W1897240248","https://openalex.org/W1922655562","https://openalex.org/W1969851134","https://openalex.org/W1979482308","https://openalex.org/W1989364685","https://openalex.org/W1989549063","https://openalex.org/W1992475611","https://openalex.org/W2006129368","https://openalex.org/W2009934439","https://openalex.org/W2015337779","https://openalex.org/W2017608047","https://openalex.org/W2033310064","https://openalex.org/W2035576074","https://openalex.org/W2041638389","https://openalex.org/W2042141988","https://openalex.org/W2044893557","https://openalex.org/W2046869671","https://openalex.org/W2057200980","https://openalex.org/W2057826000","https://openalex.org/W2062164080","https://openalex.org/W2063224314","https://openalex.org/W2064675550","https://openalex.org/W2069681747","https://openalex.org/W2079362249","https://openalex.org/W2079623482","https://openalex.org/W2114016253","https://openalex.org/W2114719288","https://openalex.org/W2131342762","https://openalex.org/W2137075158","https://openalex.org/W2141411743","https://openalex.org/W2143612262","https://openalex.org/W2146502635","https://openalex.org/W2148575186","https://openalex.org/W2149600041","https://openalex.org/W2156387975","https://openalex.org/W2160306971","https://openalex.org/W2160815625","https://openalex.org/W2168379380","https://openalex.org/W2187519021","https://openalex.org/W2213952365","https://openalex.org/W2291652038","https://openalex.org/W2394967684","https://openalex.org/W2396918387","https://openalex.org/W2397728357","https://openalex.org/W2398264106","https://openalex.org/W2399557756","https://openalex.org/W2402146185","https://openalex.org/W2403553999","https://openalex.org/W2404019834","https://openalex.org/W2408713104","https://openalex.org/W2561557072","https://openalex.org/W2626775259","https://openalex.org/W2755891984","https://openalex.org/W4233392025","https://openalex.org/W6631362777","https://openalex.org/W6640090968","https://openalex.org/W6681435938","https://openalex.org/W6682889407","https://openalex.org/W6687075232","https://openalex.org/W6711908631","https://openalex.org/W6712249501","https://openalex.org/W6712395177","https://openalex.org/W6712476441","https://openalex.org/W6712706723","https://openalex.org/W6712930963","https://openalex.org/W6712950515","https://openalex.org/W6713597360","https://openalex.org/W6713729801","https://openalex.org/W6739898779","https://openalex.org/W6744261651"],"related_works":["https://openalex.org/W2145230572","https://openalex.org/W2166312020","https://openalex.org/W2150750161","https://openalex.org/W4384389756","https://openalex.org/W151018310","https://openalex.org/W3081187864","https://openalex.org/W4380605396","https://openalex.org/W3133352777","https://openalex.org/W3136989387","https://openalex.org/W2008737763"],"abstract_inverted_index":{"Robustness":[0],"against":[1],"noise":[2],"and":[3,41,58,69,93,127,151,207,220],"reverberation":[4],"is":[5,19,46,82,164],"critical":[6],"for":[7,38,154],"ASR":[8],"systems":[9],"deployed":[10],"in":[11,74,124],"real-world":[12],"environments.":[13],"In":[14,107],"robust":[15],"ASR,":[16],"corrupted":[17],"speech":[18,23,39,55,87],"normally":[20],"enhanced":[21,86],"using":[22],"separation":[24,40,56,80,100,136,182],"or":[25],"enhancement":[26],"algorithms":[27],"before":[28],"recognition.":[29,42],"This":[30,77],"paper":[31],"presents":[32],"a":[33,49,59,65,174,221],"novel":[34],"joint":[35],"training":[36,112,140],"framework":[37],"The":[43,184],"key":[44],"idea":[45],"to":[47,63,84,102,113,133,166],"concatenate":[48],"deep":[50],"neural":[51,67],"network":[52,171],"(DNN)":[53],"based":[54],"frontend":[57,81,101,137],"DNN-based":[60],"acoustic":[61,91,95,126,155],"model":[62,92,96],"build":[64],"larger":[66],"network,":[68],"jointly":[70,115,169],"adjust":[71],"the":[72,79,90,94,99,114,120,125,135,139,145,158,168,178,181,201,205,214,226],"weights":[73],"each":[75],"module.":[76],"way,":[78],"able":[83],"provide":[85],"desired":[88],"by":[89,172],"can":[97,130],"guide":[98],"produce":[103],"more":[104,149],"discriminative":[105],"enhancement.":[106],"addition,":[108],"we":[109,147],"apply":[110],"sequence":[111],"trained":[116,170],"DNN":[117],"so":[118],"that":[119],"linguistic":[121],"information":[122],"contained":[123],"language":[128],"models":[129],"be":[131],"back-propagated":[132],"influence":[134],"at":[138],"stage.":[141],"To":[142],"further":[143],"improve":[144],"robustness,":[146],"add":[148],"noise-":[150],"reverberation-robust":[152],"features":[153],"modeling.":[156],"At":[157],"test":[159,202],"stage,":[160],"utterance-level":[161],"unsupervised":[162],"adaptation":[163,192],"performed":[165],"adapt":[167],"learning":[173],"linear":[175],"transformation":[176],"of":[177,180,204],"input":[179],"frontend.":[183],"resulting":[185],"sequence-discriminative":[186],"jointly-trained":[187],"multistream":[188],"system":[189],"with":[190],"run-time":[191],"achieves":[193],"10.63%":[194],"average":[195],"word":[196],"error":[197,223],"rate":[198],"(WER)":[199],"on":[200,217],"set":[203],"reverberant":[206],"noisy":[208],"CHiME-2":[209],"dataset":[210,219],"(task-2),":[211],"which":[212],"represents":[213],"best":[215,227],"performance":[216],"this":[218],"22.75%":[222],"reduction":[224],"over":[225],"existing":[228],"method.":[229]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":21},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":19},{"year":2020,"cited_by_count":20},{"year":2019,"cited_by_count":28},{"year":2018,"cited_by_count":18},{"year":2017,"cited_by_count":17},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
