{"id":"https://openalex.org/W4405709553","doi":"https://doi.org/10.1109/iscslp63861.2024.10800614","title":"CUSIDE-T: Chunking, Simulating Future and Decoding for Transducer Based Streaming ASR","display_name":"CUSIDE-T: Chunking, Simulating Future and Decoding for Transducer Based Streaming ASR","publication_year":2024,"publication_date":"2024-11-07","ids":{"openalex":"https://openalex.org/W4405709553","doi":"https://doi.org/10.1109/iscslp63861.2024.10800614"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp63861.2024.10800614","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10800614","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101689698","display_name":"Wenbo Zhao","orcid":"https://orcid.org/0000-0002-5517-1304"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wenbo Zhao","raw_affiliation_strings":["China Unicom (Guangdong) Industrial Internet Co., Ltd"],"affiliations":[{"raw_affiliation_string":"China Unicom (Guangdong) Industrial Internet Co., Ltd","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105762673","display_name":"Z.G. Li","orcid":"https://orcid.org/0009-0003-6148-2680"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziwei Li","raw_affiliation_strings":["Tsinghua University,Speech Processing and Machine Intelligence (SPMI) Lab"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Speech Processing and Machine Intelligence (SPMI) Lab","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059741436","display_name":"Chuan Yu","orcid":"https://orcid.org/0000-0003-3748-8663"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chuan Yu","raw_affiliation_strings":["China Unicom (Guangdong) Industrial Internet Co., Ltd"],"affiliations":[{"raw_affiliation_string":"China Unicom (Guangdong) Industrial Internet Co., Ltd","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010173604","display_name":"Zhijian Ou","orcid":"https://orcid.org/0000-0002-9018-5074"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhijian Ou","raw_affiliation_strings":["Tsinghua University,Speech Processing and Machine Intelligence (SPMI) Lab"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Speech Processing and Machine Intelligence (SPMI) Lab","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101689698"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.0484,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.78859913,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"11","last_page":"15"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9728999733924866,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9728999733924866,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.8317914605140686},{"id":"https://openalex.org/keywords/chunking","display_name":"Chunking (psychology)","score":0.8140862584114075},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7775119543075562},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.178139328956604},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09556636214256287}],"concepts":[{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.8317914605140686},{"id":"https://openalex.org/C203357204","wikidata":"https://www.wikidata.org/wiki/Q1089605","display_name":"Chunking (psychology)","level":2,"score":0.8140862584114075},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7775119543075562},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.178139328956604},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09556636214256287}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp63861.2024.10800614","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10800614","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W179875071","https://openalex.org/W2064675550","https://openalex.org/W2127141656","https://openalex.org/W2327501763","https://openalex.org/W2766219058","https://openalex.org/W2913718171","https://openalex.org/W2936774411","https://openalex.org/W2939297570","https://openalex.org/W2963242190","https://openalex.org/W3008174054","https://openalex.org/W3015457435","https://openalex.org/W3096396467","https://openalex.org/W3097777922","https://openalex.org/W3163203022","https://openalex.org/W3198492054","https://openalex.org/W3203407300","https://openalex.org/W4296070361","https://openalex.org/W4297841336","https://openalex.org/W4385245566","https://openalex.org/W6623517193","https://openalex.org/W6635078382","https://openalex.org/W6638749077","https://openalex.org/W6640090968","https://openalex.org/W6679855610","https://openalex.org/W6747158283","https://openalex.org/W6787040858","https://openalex.org/W6790121257","https://openalex.org/W6797037654"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2384729545","https://openalex.org/W2198395236","https://openalex.org/W2800417007","https://openalex.org/W147604216","https://openalex.org/W2161080928","https://openalex.org/W2118525872","https://openalex.org/W2279104074"],"abstract_inverted_index":{"Streaming":[0],"automatic":[1],"speech":[2],"recognition":[3,62],"(ASR)":[4],"is":[5,132,143],"very":[6],"important":[7],"for":[8,17,44,151],"many":[9],"real-world":[10],"ASR":[11,19,83,140],"applications.":[12],"However,":[13],"a":[14,30,54,108],"notable":[15],"challenge":[16],"streaming":[18,50,139,152],"systems":[20],"lies":[21],"in":[22,99],"balancing":[23],"operational":[24],"performance":[25,150],"against":[26],"latency":[27,59],"constraint.":[28],"Recently,":[29],"method":[31,75],"of":[32,86,136,157],"chunking,":[33],"simulating":[34],"future":[35],"context":[36],"and":[37,60,120,125],"decoding,":[38],"called":[39],"CUSIDE,":[40],"has":[41],"been":[42],"proposed":[43],"connectionist":[45],"temporal":[46],"classification":[47],"(CTC)":[48],"based":[49,88,128,138],"ASR,":[51,153],"which":[52,70],"obtains":[53],"good":[55],"balance":[56],"between":[57],"reduced":[58],"high":[61],"accuracy.":[63],"In":[64],"this":[65],"paper,":[66],"we":[67],"present":[68],"CUSIDE-T,":[69],"successfully":[71],"adapts":[72],"the":[73,77,90,117],"CUSIDE":[74],"over":[76,116],"recurrent":[78],"neural":[79],"network":[80],"transducer":[81],"(RNN-T)":[82],"architecture,":[84],"instead":[85],"being":[87],"on":[89,129],"CTC":[91],"architecture.":[92],"We":[93],"also":[94],"incorporate":[95],"language":[96],"model":[97],"rescoring":[98],"CUSIDE-T":[100,124,146],"to":[101],"further":[102],"enhance":[103],"accuracy,":[104],"while":[105],"only":[106],"bringing":[107],"small":[109],"additional":[110],"latency.":[111,158],"Extensive":[112],"experiments":[113],"are":[114],"conducted":[115],"AISHELL-l,":[118],"WenetSpeech":[119],"SpeechIO":[121],"datasets,":[122],"comparing":[123],"U2++":[126,131],"(both":[127],"RNN-T).":[130],"an":[133],"existing":[134],"counterpart":[135],"chunk":[137],"method.":[141],"It":[142],"shown":[144],"that":[145],"achieves":[147],"superior":[148],"accuracy":[149],"with":[154],"equal":[155],"settings":[156]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
