{"id":"https://openalex.org/W3185108477","doi":"https://doi.org/10.21437/interspeech.2021-161","title":"On Minimum Word Error Rate Training of the Hybrid Autoregressive Transducer","display_name":"On Minimum Word Error Rate Training of the Hybrid Autoregressive Transducer","publication_year":2021,"publication_date":"2021-08-27","ids":{"openalex":"https://openalex.org/W3185108477","doi":"https://doi.org/10.21437/interspeech.2021-161","mag":"3185108477"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2021-161","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-161","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101607148","display_name":"Liang Lu","orcid":"https://orcid.org/0000-0003-4005-679X"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Liang Lu","raw_affiliation_strings":["Microsoft Corp., USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp., USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101749753","display_name":"Zhong Meng","orcid":"https://orcid.org/0000-0001-7814-5929"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhong Meng","raw_affiliation_strings":["Microsoft Corp., USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp., USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016279564","display_name":"Naoyuki Kanda","orcid":"https://orcid.org/0000-0002-8628-3288"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Naoyuki Kanda","raw_affiliation_strings":["Microsoft Corp., USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp., USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100365053","display_name":"Jinyu Li","orcid":"https://orcid.org/0000-0002-1089-9748"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinyu Li","raw_affiliation_strings":["Microsoft Corp., USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp., USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5107639889","display_name":"Yifan Gong","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yifan Gong","raw_affiliation_strings":["Microsoft Corp., USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp., USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101607148"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":0.8158,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.7804011,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3435","last_page":"3439"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.7598303556442261},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6812098622322083},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.6657677888870239},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6581757068634033},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6493041515350342},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5620396137237549},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5031523108482361},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.48145025968551636},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4583991467952728},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.4446142315864563},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4297768771648407},{"id":"https://openalex.org/keywords/beam-search","display_name":"Beam search","score":0.42929643392562866},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.42272454500198364},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40234440565109253},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3789994418621063},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3505627512931824},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.21194851398468018},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19859126210212708},{"id":"https://openalex.org/keywords/search-algorithm","display_name":"Search algorithm","score":0.13093647360801697}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.7598303556442261},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6812098622322083},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.6657677888870239},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6581757068634033},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6493041515350342},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5620396137237549},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5031523108482361},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.48145025968551636},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4583991467952728},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.4446142315864563},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4297768771648407},{"id":"https://openalex.org/C19889080","wikidata":"https://www.wikidata.org/wiki/Q2835852","display_name":"Beam search","level":3,"score":0.42929643392562866},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.42272454500198364},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40234440565109253},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3789994418621063},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3505627512931824},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.21194851398468018},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19859126210212708},{"id":"https://openalex.org/C125583679","wikidata":"https://www.wikidata.org/wiki/Q755673","display_name":"Search algorithm","level":2,"score":0.13093647360801697},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2021-161","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-161","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W38527073","https://openalex.org/W854541894","https://openalex.org/W1828163288","https://openalex.org/W1915251500","https://openalex.org/W1922655562","https://openalex.org/W1942035323","https://openalex.org/W2131342762","https://openalex.org/W2150907703","https://openalex.org/W2327501763","https://openalex.org/W2403195671","https://openalex.org/W2515801922","https://openalex.org/W2577366047","https://openalex.org/W2888779557","https://openalex.org/W2962760690","https://openalex.org/W2962765220","https://openalex.org/W3008037978","https://openalex.org/W3016234571","https://openalex.org/W3094979069","https://openalex.org/W3095697114","https://openalex.org/W3096815019"],"related_works":["https://openalex.org/W2160451571","https://openalex.org/W4245698648","https://openalex.org/W2405257913","https://openalex.org/W3133710586","https://openalex.org/W2125964738","https://openalex.org/W2098529290","https://openalex.org/W2026402306","https://openalex.org/W2495256954","https://openalex.org/W2259317772","https://openalex.org/W2114916462"],"abstract_inverted_index":{"Hybrid":[0],"Autoregressive":[1],"Transducer":[2,18],"(HAT)":[3],"is":[4,96],"a":[5,47],"recently":[6],"proposed":[7],"end-to-end":[8,114],"acoustic":[9],"model":[10,27,71,153],"that":[11,95,133],"extends":[12],"the":[13,21,24,31,35,74,84,99,138,145,149,152,155],"standard":[14],"Recurrent":[15],"Neural":[16],"Network":[17],"(RNN-T)":[19],"for":[20,51,102],"purpose":[22],"of":[23,91,113,128,140,151],"external":[25,64],"language":[26],"(LM)":[28],"fusion.In":[29],"HAT,":[30],"blank":[32],"probability":[33,37,43],"and":[34,56,105,120,162],"label":[36],"are":[38],"estimated":[39],"using":[40],"two":[41],"separate":[42],"distributions,":[44],"which":[45],"provides":[46],"more":[48],"accurate":[49],"solution":[50],"internal":[52],"LM":[53],"score":[54],"estimation,":[55],"thus":[57],"works":[58],"better":[59],"when":[60],"combining":[61],"with":[62,73,124],"an":[63],"LM.Previous":[65],"work":[66],"mainly":[67],"focuses":[68],"on":[69],"HAT":[70,92,141],"training":[72,90,129,135],"negative":[75],"log-likelihood":[76],"loss,":[77],"while":[78,143],"in":[79],"this":[80],"paper,":[81],"we":[82,131],"study":[83],"minimum":[85],"word":[86],"error":[87],"rate":[88],"(MWER)":[89],"-a":[93],"criterion":[94],"closer":[97],"to":[98,110],"evaluation":[100],"metric":[101],"speech":[103],"recognition,":[104],"has":[106],"been":[107],"successfully":[108],"applied":[109],"other":[111],"types":[112],"models":[115],"such":[116,158],"as":[117,159],"sequenceto-sequence":[118],"(S2S)":[119],"RNN-T":[121],"models.From":[122],"experiments":[123],"around":[125],"30,000":[126],"hours":[127],"data,":[130],"show":[132],"MWER":[134],"can":[136],"improve":[137],"accuracy":[139],"models,":[142],"at":[144],"same":[146],"time,":[147],"improving":[148],"robustness":[150],"against":[154],"decoding":[156,163],"hyper-parameters":[157],"length":[160],"normalization":[161],"beam":[164],"during":[165],"inference.":[166]},"counts_by_year":[{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
