{"id":"https://openalex.org/W4375869369","doi":"https://doi.org/10.1109/icassp49357.2023.10097012","title":"TrimTail: Low-Latency Streaming ASR with Simple But Effective Spectrogram-Level Length Penalty","display_name":"TrimTail: Low-Latency Streaming ASR with Simple But Effective Spectrogram-Level Length Penalty","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869369","doi":"https://doi.org/10.1109/icassp49357.2023.10097012"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10097012","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10097012","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074833963","display_name":"Xingchen Song","orcid":"https://orcid.org/0009-0009-9516-5361"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xingchen Song","raw_affiliation_strings":["Tsinghua Univ.,Beijing,China","WeNet Open Source Community","Horizon Inc., Beijing, China","Tsinghua Univ., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Univ.,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]},{"raw_affiliation_string":"Horizon Inc., Beijing, China","institution_ids":[]},{"raw_affiliation_string":"Tsinghua Univ., Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100599890","display_name":"Di Wu","orcid":"https://orcid.org/0000-0002-4753-8161"},"institutions":[{"id":"https://openalex.org/I4210090142","display_name":"Horizon Research (United States)","ror":"https://ror.org/00ekxed29","country_code":"US","type":"company","lineage":["https://openalex.org/I4210090142"]},{"id":"https://openalex.org/I4210152372","display_name":"Horizon Discovery Group (United States)","ror":"https://ror.org/0496m6d18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095126","https://openalex.org/I4210152372","https://openalex.org/I43591049","https://openalex.org/I4408895824"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Di Wu","raw_affiliation_strings":["Horizon Inc.,Beijing,China","Horizon Inc., Beijing, China","WeNet Open Source Community"],"affiliations":[{"raw_affiliation_string":"Horizon Inc.,Beijing,China","institution_ids":["https://openalex.org/I4210090142","https://openalex.org/I4210152372"]},{"raw_affiliation_string":"Horizon Inc., Beijing, China","institution_ids":[]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100667025","display_name":"Zhiyong Wu","orcid":"https://orcid.org/0000-0002-6527-5502"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyong Wu","raw_affiliation_strings":["Tsinghua Univ.,Beijing,China","Tsinghua Univ., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Univ.,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua Univ., Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100449716","display_name":"Binbin Zhang","orcid":"https://orcid.org/0000-0002-7164-5127"},"institutions":[{"id":"https://openalex.org/I4210090142","display_name":"Horizon Research (United States)","ror":"https://ror.org/00ekxed29","country_code":"US","type":"company","lineage":["https://openalex.org/I4210090142"]},{"id":"https://openalex.org/I4210152372","display_name":"Horizon Discovery Group (United States)","ror":"https://ror.org/0496m6d18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095126","https://openalex.org/I4210152372","https://openalex.org/I43591049","https://openalex.org/I4408895824"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Binbin Zhang","raw_affiliation_strings":["Horizon Inc.,Beijing,China","WeNet Open Source Community","Horizon Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Horizon Inc.,Beijing,China","institution_ids":["https://openalex.org/I4210090142","https://openalex.org/I4210152372"]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]},{"raw_affiliation_string":"Horizon Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074415592","display_name":"Yuekai Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuekai Zhang","raw_affiliation_strings":["WeNet Open Source Community"],"affiliations":[{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041630666","display_name":"Zhendong Peng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090142","display_name":"Horizon Research (United States)","ror":"https://ror.org/00ekxed29","country_code":"US","type":"company","lineage":["https://openalex.org/I4210090142"]},{"id":"https://openalex.org/I4210152372","display_name":"Horizon Discovery Group (United States)","ror":"https://ror.org/0496m6d18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095126","https://openalex.org/I4210152372","https://openalex.org/I43591049","https://openalex.org/I4408895824"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhendong Peng","raw_affiliation_strings":["Horizon Inc.,Beijing,China","WeNet Open Source Community","Horizon Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Horizon Inc.,Beijing,China","institution_ids":["https://openalex.org/I4210090142","https://openalex.org/I4210152372"]},{"raw_affiliation_string":"WeNet Open Source Community","institution_ids":[]},{"raw_affiliation_string":"Horizon Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101547305","display_name":"Wenpeng Li","orcid":"https://orcid.org/0009-0009-5264-2973"},"institutions":[{"id":"https://openalex.org/I4210090142","display_name":"Horizon Research (United States)","ror":"https://ror.org/00ekxed29","country_code":"US","type":"company","lineage":["https://openalex.org/I4210090142"]},{"id":"https://openalex.org/I4210152372","display_name":"Horizon Discovery Group (United States)","ror":"https://ror.org/0496m6d18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095126","https://openalex.org/I4210152372","https://openalex.org/I43591049","https://openalex.org/I4408895824"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wenpeng Li","raw_affiliation_strings":["Horizon Inc.,Beijing,China","Horizon Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Horizon Inc.,Beijing,China","institution_ids":["https://openalex.org/I4210090142","https://openalex.org/I4210152372"]},{"raw_affiliation_string":"Horizon Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031150833","display_name":"Fuping Pan","orcid":"https://orcid.org/0000-0001-9171-0726"},"institutions":[{"id":"https://openalex.org/I4210152372","display_name":"Horizon Discovery Group (United States)","ror":"https://ror.org/0496m6d18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095126","https://openalex.org/I4210152372","https://openalex.org/I43591049","https://openalex.org/I4408895824"]},{"id":"https://openalex.org/I4210090142","display_name":"Horizon Research (United States)","ror":"https://ror.org/00ekxed29","country_code":"US","type":"company","lineage":["https://openalex.org/I4210090142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fuping Pan","raw_affiliation_strings":["Horizon Inc.,Beijing,China","Horizon Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Horizon Inc.,Beijing,China","institution_ids":["https://openalex.org/I4210090142","https://openalex.org/I4210152372"]},{"raw_affiliation_string":"Horizon Inc., Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030243018","display_name":"Changbao Zhu","orcid":"https://orcid.org/0000-0002-4438-8102"},"institutions":[{"id":"https://openalex.org/I4210090142","display_name":"Horizon Research (United States)","ror":"https://ror.org/00ekxed29","country_code":"US","type":"company","lineage":["https://openalex.org/I4210090142"]},{"id":"https://openalex.org/I4210152372","display_name":"Horizon Discovery Group (United States)","ror":"https://ror.org/0496m6d18","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095126","https://openalex.org/I4210152372","https://openalex.org/I43591049","https://openalex.org/I4408895824"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Changbao Zhu","raw_affiliation_strings":["Horizon Inc.,Beijing,China","Horizon Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Horizon Inc.,Beijing,China","institution_ids":["https://openalex.org/I4210090142","https://openalex.org/I4210152372"]},{"raw_affiliation_string":"Horizon Inc., Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5074833963"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":1.3642,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.84320768,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11667","display_name":"Advanced Chemical Sensor Technologies","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8289656043052673},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7781564593315125},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.7390289902687073},{"id":"https://openalex.org/keywords/trimming","display_name":"Trimming","score":0.6001766920089722},{"id":"https://openalex.org/keywords/low-latency","display_name":"Low latency (capital markets)","score":0.5404767394065857},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.45411989092826843},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.37627974152565},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.35781601071357727},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.3444826602935791},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.16887876391410828},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.13690122961997986},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.1006021797657013}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8289656043052673},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7781564593315125},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.7390289902687073},{"id":"https://openalex.org/C56951928","wikidata":"https://www.wikidata.org/wiki/Q3539213","display_name":"Trimming","level":2,"score":0.6001766920089722},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.5404767394065857},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.45411989092826843},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.37627974152565},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.35781601071357727},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3444826602935791},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.16887876391410828},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.13690122961997986},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.1006021797657013},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10097012","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10097012","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1828163288","https://openalex.org/W2127141656","https://openalex.org/W2327501763","https://openalex.org/W2892009249","https://openalex.org/W2963242190","https://openalex.org/W2963920996","https://openalex.org/W3015927303","https://openalex.org/W3097777922","https://openalex.org/W3163907627","https://openalex.org/W3170405627","https://openalex.org/W3196784225","https://openalex.org/W4221167707","https://openalex.org/W4306705025","https://openalex.org/W4385245566","https://openalex.org/W6638749077","https://openalex.org/W6739901393","https://openalex.org/W6797037654","https://openalex.org/W6845608264"],"related_works":["https://openalex.org/W2467235537","https://openalex.org/W3205411230","https://openalex.org/W4286899009","https://openalex.org/W9168048","https://openalex.org/W4300849822","https://openalex.org/W4376480820","https://openalex.org/W3155891479","https://openalex.org/W3029351463","https://openalex.org/W2885352820","https://openalex.org/W4296591952"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,122],"present":[4],"TrimTail,":[5,121],"a":[6,125],"simple":[7],"but":[8],"effective":[9],"emission":[10],"regularization":[11],"method":[12],"to":[13,27],"improve":[14],"the":[15,41],"latency":[16,105],"of":[17,24,43,129,138],"streaming":[18,87],"ASR":[19,88],"models.":[20],"The":[21],"core":[22],"idea":[23],"TrimTail":[25,55],"is":[26,56],"apply":[28],"length":[29],"penalty":[30],"(i.e.,":[31],"by":[32,81,119],"trimming":[33],"trailing":[34],"frames,":[35],"see":[36],"Fig.":[37],"1-(b))":[38],"directly":[39],"on":[40,74,84,113],"spectrogram":[42],"input":[44],"utterances,":[45],"which":[46],"does":[47],"not":[48],"require":[49],"any":[50,67,71,75,78],"alignment.":[51],"We":[52,100],"demonstrate":[53],"that":[54],"computationally":[57],"cheap":[58],"and":[59,64,116],"can":[60,123],"be":[61],"applied":[62],"online":[63],"optimized":[65],"with":[66,92,107,134],"training":[68],"loss":[69,94,98,137],"or":[70,96,109],"model":[72],"architecture":[73],"dataset":[76],"without":[77],"extra":[79],"effort":[80],"applying":[82],"it":[83],"various":[85],"end-to-end":[86],"networks":[89],"either":[90],"trained":[91],"CTC":[93],"[1]":[95],"Transducer":[97],"[2].":[99],"achieve":[101,124],"100":[102],"~":[103],"200ms":[104],"reduction":[106],"equal":[108],"even":[110],"better":[111],"accuracy":[112,136],"both":[114],"Aishell-1":[115],"Librispeech.":[117],"Moreover,":[118],"using":[120],"400ms":[126],"algorithmic":[127],"improvement":[128],"User":[130],"Sensitive":[131],"Delay":[132],"(USD)":[133],"an":[135],"less":[139],"than":[140],"0.2.":[141]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
