{"id":"https://openalex.org/W3160766462","doi":"https://doi.org/10.1109/icassp39728.2021.9413899","title":"A Better and Faster end-to-end Model for Streaming ASR","display_name":"A Better and Faster end-to-end Model for Streaming ASR","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3160766462","doi":"https://doi.org/10.1109/icassp39728.2021.9413899","mag":"3160766462"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9413899","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413899","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100374448","display_name":"Bo Li","orcid":"https://orcid.org/0000-0002-6711-3603"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bo Li","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070108948","display_name":"Anmol Gulati","orcid":"https://orcid.org/0009-0007-2077-9583"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anmol Gulati","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001205723","display_name":"Jiahui Yu","orcid":"https://orcid.org/0000-0003-1314-2481"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiahui Yu","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tara N. Sainath","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027763497","display_name":"Chung\u2010Cheng Chiu","orcid":"https://orcid.org/0000-0001-9729-4778"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chung-Cheng Chiu","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000078382","display_name":"Arun Narayanan","orcid":"https://orcid.org/0009-0008-3325-8928"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arun Narayanan","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001306222","display_name":"Shuo-Yiin Chang","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shuo-Yiin Chang","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112901893","display_name":"Ruoming Pang","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruoming Pang","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101319167","display_name":"Yanzhang He","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanzhang He","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048771433","display_name":"James Qin","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Qin","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100750907","display_name":"Wei Han","orcid":"https://orcid.org/0000-0002-4201-9645"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei Han","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026297587","display_name":"Qiao Liang","orcid":"https://orcid.org/0000-0003-4464-4644"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qiao Liang","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100433648","display_name":"Yu Zhang","orcid":"https://orcid.org/0000-0002-9505-1833"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Zhang","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032931723","display_name":"Trevor Strohman","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Trevor Strohman","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010253402","display_name":"Yonghui Wu","orcid":"https://orcid.org/0000-0002-6780-6135"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yonghui Wu","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":15,"corresponding_author_ids":["https://openalex.org/A5100374448"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":11.9689,"has_fulltext":false,"cited_by_count":92,"citation_normalized_percentile":{"value":0.98839626,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5634","last_page":"5638"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8077638149261475},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.7880938649177551},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.726778507232666},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.712870717048645},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.49075397849082947},{"id":"https://openalex.org/keywords/degradation","display_name":"Degradation (telecommunications)","score":0.4156181216239929},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4128198027610779},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4110730290412903},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.39522626996040344},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.326140433549881},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09939467906951904}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8077638149261475},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.7880938649177551},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.726778507232666},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.712870717048645},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.49075397849082947},{"id":"https://openalex.org/C2779679103","wikidata":"https://www.wikidata.org/wiki/Q5251805","display_name":"Degradation (telecommunications)","level":2,"score":0.4156181216239929},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4128198027610779},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4110730290412903},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.39522626996040344},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.326140433549881},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09939467906951904},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9413899","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413899","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1828163288","https://openalex.org/W1855892484","https://openalex.org/W2033256038","https://openalex.org/W2073965851","https://openalex.org/W2076214367","https://openalex.org/W2121879602","https://openalex.org/W2127141656","https://openalex.org/W2178654303","https://openalex.org/W2271840356","https://openalex.org/W2515439472","https://openalex.org/W2526425061","https://openalex.org/W2605141709","https://openalex.org/W2617258110","https://openalex.org/W2747467128","https://openalex.org/W2928941594","https://openalex.org/W2936123380","https://openalex.org/W2962760690","https://openalex.org/W2962824709","https://openalex.org/W2964110616","https://openalex.org/W2964121744","https://openalex.org/W2972439411","https://openalex.org/W2972625221","https://openalex.org/W2973122799","https://openalex.org/W2981857663","https://openalex.org/W3008181812","https://openalex.org/W3008898571","https://openalex.org/W3015194534","https://openalex.org/W3015654466","https://openalex.org/W3015927303","https://openalex.org/W3021515889","https://openalex.org/W3025165719","https://openalex.org/W3037029661","https://openalex.org/W3092970820","https://openalex.org/W3095311338","https://openalex.org/W3096524176","https://openalex.org/W3096702180","https://openalex.org/W3097777922","https://openalex.org/W3163203022","https://openalex.org/W3163907627","https://openalex.org/W6631190155","https://openalex.org/W6638749077","https://openalex.org/W6639156005","https://openalex.org/W6685711979","https://openalex.org/W6694517276","https://openalex.org/W6735706088","https://openalex.org/W6749954789","https://openalex.org/W6756495167","https://openalex.org/W6760633627","https://openalex.org/W6769557084","https://openalex.org/W6779924452","https://openalex.org/W6784192591","https://openalex.org/W6784572113"],"related_works":["https://openalex.org/W2916997151","https://openalex.org/W2949174760","https://openalex.org/W1566315437","https://openalex.org/W4221142855","https://openalex.org/W2594897229","https://openalex.org/W2151348424","https://openalex.org/W2050138804","https://openalex.org/W767271433","https://openalex.org/W4290708361","https://openalex.org/W2129812225"],"abstract_inverted_index":{"End-to-end":[0],"(E2E)":[1],"models":[2,9],"have":[3],"shown":[4,108],"to":[5,36,51,67,122,127],"outperform":[6],"state-of-the-art":[7],"conventional":[8,53],"for":[10,111,166],"streaming":[11,167],"speech":[12],"recognition":[13],"[1]":[14],"across":[15],"many":[16],"dimensions,":[17],"including":[18],"quality":[19,84,162],"(as":[20],"measured":[21],"by":[22],"word":[23],"error":[24],"rate":[25],"(WER))":[26],"and":[27,43,163],"endpointer":[28],"latency":[29,49,80,164],"[2].":[30],"However,":[31],"the":[32,38,41,64,92,96,129,141,153],"model":[33,66,101],"still":[34],"tends":[35],"delay":[37],"predictions":[39],"towards":[40],"end":[42],"thus":[44],"has":[45,107],"much":[46],"higher":[47],"partial":[48],"compared":[50],"a":[52,83,118,160],"ASR":[54],"model.":[55],"To":[56,86],"address":[57,87],"this":[58],"issue,":[59],"we":[60,89,114,133],"look":[61],"at":[62],"encouraging":[63],"E2E":[65,100],"emit":[68],"words":[69],"early,":[70],"through":[71],"an":[72,146],"algorithm":[73,147],"called":[74,148],"FastEmit":[75],"[3].":[76],"Naturally,":[77],"improving":[78],"on":[79],"results":[81],"in":[82,95],"degradation.":[85],"this,":[88],"explore":[90,116,134],"replacing":[91],"LSTM":[93],"layers":[94,104,137],"encoder":[97],"of":[98],"our":[99],"with":[102,156],"Conformer":[103,136,154],"[4],":[105],"which":[106],"good":[109],"improvements":[110],"ASR.":[112,168],"Secondly,":[113],"also":[115],"running":[117],"2nd-pass":[119,130],"beam":[120],"search":[121],"improve":[123],"quality.":[124],"In":[125],"order":[126],"ensure":[128],"completes":[131],"quickly,":[132],"non-causal":[135],"that":[138],"feed":[139],"into":[140],"same":[142],"1st-pass":[143],"RNN-T":[144,155],"decoder,":[145],"Cascaded":[149,157],"Encoders":[150,158],"[5].":[151],"Overall,":[152],"offers":[159],"better":[161],"tradeoff":[165]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":28},{"year":2022,"cited_by_count":30},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
