{"id":"https://openalex.org/W4210463634","doi":"https://doi.org/10.1109/asru51503.2021.9687871","title":"Scaling End-to-End Models for Large-Scale Multilingual ASR","display_name":"Scaling End-to-End Models for Large-Scale Multilingual ASR","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W4210463634","doi":"https://doi.org/10.1109/asru51503.2021.9687871"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9687871","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9687871","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100677409","display_name":"Bo Li","orcid":"https://orcid.org/0000-0003-4883-7267"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bo Li","raw_affiliation_strings":["Google,USA","Google, USA"],"affiliations":[{"raw_affiliation_string":"Google,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112901893","display_name":"Ruoming Pang","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruoming Pang","raw_affiliation_strings":["Google,USA","Google, USA"],"affiliations":[{"raw_affiliation_string":"Google,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tara N. Sainath","raw_affiliation_strings":["Google,USA","Google, USA"],"affiliations":[{"raw_affiliation_string":"Google,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070108948","display_name":"Anmol Gulati","orcid":"https://orcid.org/0009-0007-2077-9583"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anmol Gulati","raw_affiliation_strings":["Google,USA","Google, USA"],"affiliations":[{"raw_affiliation_string":"Google,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100619178","display_name":"Zhang Yu","orcid":"https://orcid.org/0000-0003-2012-226X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Zhang","raw_affiliation_strings":["Google,USA","Google, USA"],"affiliations":[{"raw_affiliation_string":"Google,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048771433","display_name":"James Qin","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Qin","raw_affiliation_strings":["Google,USA","Google, USA"],"affiliations":[{"raw_affiliation_string":"Google,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037705635","display_name":"Parisa Haghani","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Parisa Haghani","raw_affiliation_strings":["Google,USA","Google, USA"],"affiliations":[{"raw_affiliation_string":"Google,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101399361","display_name":"Wei Huang","orcid":"https://orcid.org/0000-0002-4817-8858"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"W. Ronny Huang","raw_affiliation_strings":["Google,USA","Google, USA"],"affiliations":[{"raw_affiliation_string":"Google,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101706633","display_name":"Min Ma","orcid":"https://orcid.org/0000-0002-3132-4898"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Min Ma","raw_affiliation_strings":["Google,USA","Google, USA"],"affiliations":[{"raw_affiliation_string":"Google,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002187986","display_name":"Junwen Bai","orcid":"https://orcid.org/0000-0001-7939-4927"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Junwen Bai","raw_affiliation_strings":["Google,USA","Google, USA"],"affiliations":[{"raw_affiliation_string":"Google,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5100677409"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":5.4033,"has_fulltext":false,"cited_by_count":50,"citation_normalized_percentile":{"value":0.96868574,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1011","last_page":"1018"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7806432247161865},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7035955786705017},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6517937183380127},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.6320996284484863},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.578967809677124},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5160179138183594},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.48611950874328613},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40391191840171814},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.401346355676651},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11022165417671204}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7806432247161865},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7035955786705017},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6517937183380127},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.6320996284484863},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.578967809677124},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5160179138183594},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.48611950874328613},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40391191840171814},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.401346355676651},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11022165417671204},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru51503.2021.9687871","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9687871","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4399999976158142}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W36434594","https://openalex.org/W1522301498","https://openalex.org/W1549321558","https://openalex.org/W1828163288","https://openalex.org/W1855892484","https://openalex.org/W2160815625","https://openalex.org/W2271840356","https://openalex.org/W2293634267","https://openalex.org/W2526425061","https://openalex.org/W2585945212","https://openalex.org/W2808640845","https://openalex.org/W2896457183","https://openalex.org/W2913340405","https://openalex.org/W2928941594","https://openalex.org/W2936774411","https://openalex.org/W2958953787","https://openalex.org/W2962760690","https://openalex.org/W2962893195","https://openalex.org/W2963431393","https://openalex.org/W2964002616","https://openalex.org/W2971840980","https://openalex.org/W3001279689","https://openalex.org/W3016010032","https://openalex.org/W3035019713","https://openalex.org/W3040573126","https://openalex.org/W3092189037","https://openalex.org/W3093579165","https://openalex.org/W3095173472","https://openalex.org/W3095311338","https://openalex.org/W3095410713","https://openalex.org/W3096032230","https://openalex.org/W3096215352","https://openalex.org/W3097777922","https://openalex.org/W3128096387","https://openalex.org/W3160766462","https://openalex.org/W3163300396","https://openalex.org/W3198429080","https://openalex.org/W4292779060","https://openalex.org/W4293569541","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6632924066","https://openalex.org/W6638749077","https://openalex.org/W6639156005","https://openalex.org/W6696934422","https://openalex.org/W6739901393","https://openalex.org/W6751104502","https://openalex.org/W6752630080","https://openalex.org/W6755207826","https://openalex.org/W6760633627","https://openalex.org/W6765469073","https://openalex.org/W6772383348","https://openalex.org/W6778883912","https://openalex.org/W6779919476","https://openalex.org/W6780805062","https://openalex.org/W6783990525","https://openalex.org/W6784614252"],"related_works":["https://openalex.org/W1657880117","https://openalex.org/W2595172197","https://openalex.org/W2127970246","https://openalex.org/W2084856301","https://openalex.org/W1001352512","https://openalex.org/W4382618745","https://openalex.org/W2885125400","https://openalex.org/W1989889224","https://openalex.org/W1987128138","https://openalex.org/W3098003361"],"abstract_inverted_index":{"Building":[0],"ASR":[1],"models":[2,129],"across":[3],"many":[4],"languages":[5,38,197],"is":[6,99],"a":[7,57,61,169],"challenging":[8],"multi-task":[9],"learning":[10],"problem":[11],"due":[12,42],"to":[13,29,43,74,81,85,103,119,195],"large":[14,180,185],"variations":[15],"and":[16,50,116,121,179,198],"heavily":[17],"unbalanced":[18],"data.":[19],"Existing":[20],"work":[21],"has":[22],"shown":[23],"positive":[24],"transfer":[25],"from":[26,45,72],"high":[27,36],"resource":[28,31,37],"low":[30],"languages.":[32],"However,":[33],"degradations":[34],"on":[35,60],"are":[39,130],"commonly":[40],"observed":[41],"interference":[44],"the":[46,65,94,105,151,155,164],"heterogeneous":[47],"multilingual":[48],"data":[49,68,134],"reduction":[51],"in":[52,140,147],"per-language":[53],"capacity.":[54],"We":[55,77],"conduct":[56],"capacity":[58,106,171],"study":[59],"15-language":[62],"task,":[63],"with":[64,188],"amount":[66],"of":[67,96,142,160],"per":[69],"language":[70],"varying":[71],"7.6K":[73],"53.5K":[75],"hours.":[76],"adopt":[78],"GShard":[79],"[1]":[80],"efficiently":[82],"scale":[83],"up":[84],"10B":[86,122],"parameters.":[87],"Empirically,":[88],"we":[89],"find":[90],"that":[91],"(1)":[92],"scaling":[93,117],"number":[95],"model":[97,111,153],"parameters":[98],"an":[100],"effective":[101],"way":[102],"solve":[104],"bottleneck":[107],"-":[108,150],"our":[109],"500M-param":[110,165],"already":[112],"outperforms":[113],"monolingual":[114],"baselines":[115],"it":[118],"1B":[120],"brought":[123],"further":[124],"quality":[125],"gains;":[126],"(2)":[127],"larger":[128],"not":[131],"only":[132],"more":[133,138],"efficient,":[135],"but":[136],"also":[137],"efficient":[139],"terms":[141],"training":[143,161],"cost":[144],"as":[145,163],"measured":[146],"TPU":[148],"days":[149],"1B-param":[152],"reaches":[154],"same":[156],"accuracy":[157],"at":[158],"34%":[159],"time":[162],"model;":[166],"(3)":[167],"given":[168],"fixed":[170],"budget,":[172],"adding":[173],"depth":[174],"works":[175],"better":[176,183],"than":[177,184],"width":[178],"encoders":[181],"do":[182],"decoders;":[186],"(4)":[187],"continuous":[189],"training,":[190],"they":[191],"can":[192],"be":[193],"adapted":[194],"new":[196],"domains.":[199]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":22},{"year":2022,"cited_by_count":12},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
