{"id":"https://openalex.org/W4392904087","doi":"https://doi.org/10.1109/icassp48485.2024.10446985","title":"Extreme Encoder Output Frame Rate Reduction: Improving Computational Latencies of Large End-to-End Models","display_name":"Extreme Encoder Output Frame Rate Reduction: Improving Computational Latencies of Large End-to-End Models","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392904087","doi":"https://doi.org/10.1109/icassp48485.2024.10446985"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446985","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446985","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032640894","display_name":"Rohit Prabhavalkar","orcid":"https://orcid.org/0000-0001-5331-6058"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Rohit Prabhavalkar","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101749753","display_name":"Zhong Meng","orcid":"https://orcid.org/0000-0001-7814-5929"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhong Meng","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101432591","display_name":"Weiran Wang","orcid":"https://orcid.org/0009-0000-0843-707X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weiran Wang","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008253072","display_name":"Adam Stooke","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adam Stooke","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013075695","display_name":"Xingyu Cai","orcid":"https://orcid.org/0000-0003-1537-7161"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xingyu Cai","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101319167","display_name":"Yanzhang He","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanzhang He","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000078382","display_name":"Arun Narayanan","orcid":"https://orcid.org/0009-0008-3325-8928"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arun Narayanan","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014088530","display_name":"Dongseong Hwang","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dongseong Hwang","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tara N. Sainath","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103874391","display_name":"Pedro J. Moreno","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pedro J. Moreno","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5032640894"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":1.8185,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.86357482,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"11816","last_page":"11820"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.8800927400588989},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.797643780708313},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.6813469529151917},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.6464623808860779},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.6317336559295654},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.5770799517631531},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5303637981414795},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.45895060896873474},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.42602798342704773},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.3616692125797272},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33350494503974915},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3227168917655945},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2262207567691803},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08567988872528076},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07840326428413391},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.06729978322982788}],"concepts":[{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.8800927400588989},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.797643780708313},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.6813469529151917},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.6464623808860779},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.6317336559295654},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.5770799517631531},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5303637981414795},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.45895060896873474},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.42602798342704773},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3616692125797272},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33350494503974915},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3227168917655945},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2262207567691803},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08567988872528076},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07840326428413391},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.06729978322982788},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446985","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446985","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W2013598660","https://openalex.org/W2064675550","https://openalex.org/W2121879602","https://openalex.org/W2127141656","https://openalex.org/W2143612262","https://openalex.org/W2327501763","https://openalex.org/W2513186698","https://openalex.org/W2617258110","https://openalex.org/W2928941594","https://openalex.org/W2936774411","https://openalex.org/W2962760690","https://openalex.org/W2963747784","https://openalex.org/W3015190365","https://openalex.org/W3015686596","https://openalex.org/W3016010032","https://openalex.org/W3016234571","https://openalex.org/W3096032230","https://openalex.org/W3096518646","https://openalex.org/W3096815019","https://openalex.org/W3097777922","https://openalex.org/W3160766462","https://openalex.org/W3161375121","https://openalex.org/W3190062760","https://openalex.org/W3196784225","https://openalex.org/W3197976839","https://openalex.org/W3211278025","https://openalex.org/W4210463634","https://openalex.org/W4223988178","https://openalex.org/W4225529283","https://openalex.org/W4323066695","https://openalex.org/W4385823026","https://openalex.org/W4387031702","https://openalex.org/W4388017359","https://openalex.org/W4391021572","https://openalex.org/W6751104502","https://openalex.org/W6760633627","https://openalex.org/W6779089016","https://openalex.org/W6780218876","https://openalex.org/W6847363464","https://openalex.org/W6850218400","https://openalex.org/W6857414055"],"related_works":["https://openalex.org/W2368824897","https://openalex.org/W1508050556","https://openalex.org/W1910862367","https://openalex.org/W2379365082","https://openalex.org/W2370747590","https://openalex.org/W2030109976","https://openalex.org/W2369260257","https://openalex.org/W2129146436","https://openalex.org/W2389120450","https://openalex.org/W55249799"],"abstract_inverted_index":{"The":[0],"accuracy":[1],"of":[2,25,31,66,91,102,129],"end-to-end":[3],"(E2E)":[4],"automatic":[5],"speech":[6],"recognition":[7],"(ASR)":[8],"models":[9],"continues":[10],"to":[11,17,58,109,156],"improve":[12],"as":[13],"they":[14],"are":[15],"scaled":[16],"larger":[18],"sizes,":[19],"with":[20],"some":[21],"now":[22],"reaching":[23],"billions":[24],"parameters.":[26],"Widespread":[27],"deployment":[28],"and":[29,147,152],"adoption":[30],"these":[32],"models,":[33],"however,":[34],"requires":[35],"computationally":[36,160],"efficient":[37],"strategies":[38],"for":[39,125],"decoding.":[40],"In":[41],"the":[42,56,89,100,107,111],"present":[43],"work,":[44,77],"we":[45,78,98,118],"study":[46,99],"one":[47,121],"such":[48],"strategy:":[49],"applying":[50],"multiple":[51,92],"frame":[52,124],"reduction":[53,82,94],"layers":[54],"in":[55,75,106],"encoder":[57,60,108,122,146],"compress":[59],"outputs":[61],"into":[62],"a":[63,139,157],"small":[64],"number":[65],"output":[67,123],"frames.":[68],"While":[69],"similar":[70],"techniques":[71],"have":[72],"been":[73,86],"investigated":[74],"previous":[76],"achieve":[79],"dramatically":[80],"more":[81],"than":[83],"has":[84],"previously":[85],"demonstrated":[87],"through":[88],"use":[90],"funnel":[93],"layers.":[95],"Through":[96],"ablations,":[97],"impact":[101],"various":[103],"architectural":[104],"choices":[105],"identify":[110],"most":[112],"effective":[113],"strategies.":[114],"We":[115],"demonstrate":[116],"that":[117],"can":[119],"generate":[120],"every":[126],"2.56":[127],"sec":[128],"input":[130],"speech,":[131],"without":[132],"significantly":[133],"affecting":[134],"word":[135],"error":[136],"rate":[137],"on":[138],"large-scale":[140],"voice":[141],"search":[142],"task,":[143],"while":[144],"improving":[145],"decoder":[148],"latencies":[149],"by":[150],"48%":[151],"92%":[153],"respectively,":[154],"relative":[155],"strong":[158],"but":[159],"expensive":[161],"baseline.":[162]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
