{"id":"https://openalex.org/W4372346432","doi":"https://doi.org/10.1109/icassp49357.2023.10097062","title":"Dynamic Chunk Convolution for Unified Streaming and Non-Streaming Conformer ASR","display_name":"Dynamic Chunk Convolution for Unified Streaming and Non-Streaming Conformer ASR","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372346432","doi":"https://doi.org/10.1109/icassp49357.2023.10097062"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10097062","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10097062","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068555152","display_name":"Xilai Li","orcid":"https://orcid.org/0000-0001-9171-2481"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xilai Li","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058343897","display_name":"Goeric Huybrechts","orcid":"https://orcid.org/0000-0003-0222-3008"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goeric Huybrechts","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004616142","display_name":"Srikanth Ronanki","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Srikanth Ronanki","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027803608","display_name":"Jeff Farris","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jeff Farris","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050552820","display_name":"Sravan Bodapati","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sravan Bodapati","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5068555152"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1937,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.82589951,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8695571422576904},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.6570460200309753},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.6057448387145996},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.5992323160171509},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.44863033294677734},{"id":"https://openalex.org/keywords/degradation","display_name":"Degradation (telecommunications)","score":0.4287605285644531},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3522694706916809},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1673145890235901}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8695571422576904},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.6570460200309753},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.6057448387145996},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.5992323160171509},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.44863033294677734},{"id":"https://openalex.org/C2779679103","wikidata":"https://www.wikidata.org/wiki/Q5251805","display_name":"Degradation (telecommunications)","level":2,"score":0.4287605285644531},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3522694706916809},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1673145890235901},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10097062","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10097062","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W1494198834","https://openalex.org/W1522301498","https://openalex.org/W2127141656","https://openalex.org/W2143612262","https://openalex.org/W2327501763","https://openalex.org/W2526425061","https://openalex.org/W2936123380","https://openalex.org/W2936774411","https://openalex.org/W2962780374","https://openalex.org/W2973122799","https://openalex.org/W3016010032","https://openalex.org/W3036601975","https://openalex.org/W3092122846","https://openalex.org/W3096686110","https://openalex.org/W3097777922","https://openalex.org/W3111562797","https://openalex.org/W3119308075","https://openalex.org/W3125815078","https://openalex.org/W3160201895","https://openalex.org/W3162665866","https://openalex.org/W3163203022","https://openalex.org/W3197478142","https://openalex.org/W3197654132","https://openalex.org/W3198492054","https://openalex.org/W3207629995","https://openalex.org/W4294619417","https://openalex.org/W4297727296","https://openalex.org/W4319862261","https://openalex.org/W4319862425","https://openalex.org/W6623517193","https://openalex.org/W6631190155","https://openalex.org/W6631362777","https://openalex.org/W6687566353","https://openalex.org/W6747158283","https://openalex.org/W6780218876","https://openalex.org/W6784400248","https://openalex.org/W6787040858","https://openalex.org/W6790121257","https://openalex.org/W6839026989","https://openalex.org/W6840724095"],"related_works":["https://openalex.org/W3204184292","https://openalex.org/W4205841273","https://openalex.org/W4205525690","https://openalex.org/W3176564347","https://openalex.org/W2355833770","https://openalex.org/W1761388607","https://openalex.org/W1985458517","https://openalex.org/W1997922073","https://openalex.org/W3031039437","https://openalex.org/W183202219"],"abstract_inverted_index":{"Recently,":[0],"there":[1],"has":[2],"been":[3],"an":[4],"increasing":[5],"interest":[6],"in":[7,73],"unifying":[8],"streaming":[9,125],"and":[10,19,34,54,95,100,112,134,138,144],"non-streaming":[11,53,129],"speech":[12],"recognition":[13],"models":[14,106],"to":[15,37,42,136],"reduce":[16],"development,":[17],"training":[18],"deployment":[20],"cost.":[21],"The":[22],"best-known":[23],"approaches":[24],"rely":[25],"on":[26,107,140],"either":[27],"window-based":[28],"or":[29],"dynamic":[30,66],"chunk-based":[31,67],"attention":[32],"strategy":[33],"causal":[35,71],"convolutions":[36],"minimize":[38],"the":[39,45,70,98,108,121,124,128,141,156],"degradation":[40,122],"due":[41],"streaming.":[43],"However,":[44],"performance":[46],"gap":[47],"still":[48],"remains":[49],"relatively":[50],"large":[51],"between":[52],"a":[55,65,74,92,151],"full-contextual":[56,93,130],"model":[57,94,119,131],"trained":[58],"independently.":[59],"To":[60],"address":[61],"this,":[62],"we":[63,83],"propose":[64],"convolution":[68,72,99],"replacing":[69],"hybrid":[75],"Connectionist":[76],"Temporal":[77],"Classification":[78],"(CTC)-Attention":[79],"Conformer":[80],"architecture.":[81],"Additionally,":[82],"demonstrate":[84],"further":[85],"improvements":[86],"through":[87],"initialization":[88],"of":[89,97,123],"weights":[90],"from":[91,132],"parallelization":[96],"self-attention":[101],"modules.":[102],"We":[103],"evaluate":[104],"our":[105,117],"open-source":[109],"Voxpopuli,":[110],"LibriSpeech":[111,142],"in-house":[113],"conversational":[114],"datasets.":[115],"Overall,":[116],"proposed":[118],"reduces":[120],"mode":[126],"over":[127,155],"41.7%":[133],"45.7%":[135],"16.7%":[137],"26.2%":[139],"test-clean":[143],"test-other":[145],"datasets":[146],"respectively,":[147],"while":[148],"improving":[149],"by":[150],"relative":[152],"15.5%":[153],"WER":[154],"previous":[157],"state-of-the-art":[158],"unified":[159],"model.":[160]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
