{"id":"https://openalex.org/W4391021509","doi":"https://doi.org/10.1109/asru57964.2023.10389667","title":"Knowledge Distillation From Offline to Streaming Transducer: Towards Accurate and Fast Streaming Model by Matching Alignments","display_name":"Knowledge Distillation From Offline to Streaming Transducer: Towards Accurate and Fast Streaming Model by Matching Alignments","publication_year":2023,"publication_date":"2023-12-16","ids":{"openalex":"https://openalex.org/W4391021509","doi":"https://doi.org/10.1109/asru57964.2023.10389667"},"language":"en","primary_location":{"id":"doi:10.1109/asru57964.2023.10389667","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/asru57964.2023.10389667","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048605725","display_name":"Ji-Hwan Mo","orcid":null},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Ji-Hwan Mo","raw_affiliation_strings":["Hanyang University,Department of Electronic Engineering,Seoul,Republic of Korea","Department of Electronic Engineering, Hanyang University, Seoul, Republic of Korea","Kakao Enterprise Corporation, Seongnam, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hanyang University,Department of Electronic Engineering,Seoul,Republic of Korea","institution_ids":["https://openalex.org/I4575257"]},{"raw_affiliation_string":"Department of Electronic Engineering, Hanyang University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I4575257"]},{"raw_affiliation_string":"Kakao Enterprise Corporation, Seongnam, Republic of Korea","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034040166","display_name":"Jae-Jin Jeon","orcid":null},"institutions":[{"id":"https://openalex.org/I4210149944","display_name":"Korea Telecom (South Korea)","ror":null,"country_code":"KR","type":null,"lineage":["https://openalex.org/I4210149944"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jae-Jin Jeon","raw_affiliation_strings":["Kakao Enterprise Corporation,Seongnam,Republic of Korea","Kakao Enterprise Corporation, Seongnam, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kakao Enterprise Corporation,Seongnam,Republic of Korea","institution_ids":["https://openalex.org/I4210149944"]},{"raw_affiliation_string":"Kakao Enterprise Corporation, Seongnam, Republic of Korea","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037434075","display_name":"Mun-Hak Lee","orcid":"https://orcid.org/0009-0005-7676-2532"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Mun-Hak Lee","raw_affiliation_strings":["Hanyang University,Department of Electronic Engineering,Seoul,Republic of Korea","Department of Electronic Engineering, Hanyang University, Seoul, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hanyang University,Department of Electronic Engineering,Seoul,Republic of Korea","institution_ids":["https://openalex.org/I4575257"]},{"raw_affiliation_string":"Department of Electronic Engineering, Hanyang University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002418613","display_name":"Joon\u2010Hyuk Chang","orcid":"https://orcid.org/0000-0003-2610-2323"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Joon-Hyuk Chang","raw_affiliation_strings":["Hanyang University,Department of Electronic Engineering,Seoul,Republic of Korea","Department of Electronic Engineering, Hanyang University, Seoul, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hanyang University,Department of Electronic Engineering,Seoul,Republic of Korea","institution_ids":["https://openalex.org/I4575257"]},{"raw_affiliation_string":"Department of Electronic Engineering, Hanyang University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I4575257"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1632,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.59479212,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.8499683737754822},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7659978866577148},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.4655070900917053},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09277138113975525}],"concepts":[{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.8499683737754822},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7659978866577148},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.4655070900917053},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09277138113975525}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru57964.2023.10389667","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/asru57964.2023.10389667","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W108866686","https://openalex.org/W2122364000","https://openalex.org/W2913178639","https://openalex.org/W2936774411","https://openalex.org/W2962760690","https://openalex.org/W2962780374","https://openalex.org/W3007433671","https://openalex.org/W3015194534","https://openalex.org/W3015927303","https://openalex.org/W3016010032","https://openalex.org/W3092970820","https://openalex.org/W3095173472","https://openalex.org/W3095687747","https://openalex.org/W3096518646","https://openalex.org/W3097777922","https://openalex.org/W3149509723","https://openalex.org/W3160628828","https://openalex.org/W3160766462","https://openalex.org/W3161873870","https://openalex.org/W3162649911","https://openalex.org/W3163907627","https://openalex.org/W3196784225","https://openalex.org/W3203453034","https://openalex.org/W4224916448","https://openalex.org/W6631362777","https://openalex.org/W6638523607","https://openalex.org/W6638749077","https://openalex.org/W6784721964"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2954284861","https://openalex.org/W3036465205"],"abstract_inverted_index":{"Sequence":[0],"transducer":[1],"is":[2,15,49,57],"a":[3,16,133],"popular":[4],"end-to-end":[5],"automatic":[6],"speech":[7],"recognition":[8],"model":[9,83,123],"for":[10],"streaming":[11,93,112,150],"scenarios:":[12],"While,":[13],"there":[14],"trade-off":[17,127],"between":[18,90,128],"accuracy":[19,41,80,129],"and":[20,55,77,92,111,130,140],"latency.":[21],"Latency":[22],"regularization":[23],"methods":[24],"such":[25],"as":[26],"FastEmit":[27,70,106],"can":[28],"reduce":[29,37,75,99],"latency,":[30,38],"but":[31],"the":[32,39,72,79,87,100,109,117,122,125,142,149],"more":[33],"they":[34],"try":[35],"to":[36,43,52,74,107],"worse":[40],"tends":[42],"be.":[44],"Conversely,":[45],"knowledge":[46],"distillation":[47],"(KD)":[48],"only":[50],"used":[51],"improve":[53,78],"accuracy,":[54],"latency":[56,76,88,101,131,143],"not":[58],"considered.":[59],"In":[60],"this":[61],"paper,":[62],"we":[63],"propose":[64],"an":[65],"effective":[66],"method":[67,98],"that":[68,121],"combines":[69],"with":[71,105,124,148],"KD":[73],"of":[81,138],"offline":[82,91,110],"in":[84],"scenarios":[85],"where":[86],"gap":[89,102],"models":[94],"gets":[95],"small.":[96],"This":[97],"by":[103,144],"applying":[104],"both":[108],"models.":[113],"Experimental":[114],"results":[115],"on":[116],"LibriSpeech":[118],"dataset":[119],"show":[120],"best":[126],"achieves":[132],"relative":[134],"error":[135],"reduction":[136],"rate":[137],"7.5%":[139],"reduces":[141],"$130":[145],"\\mathrm{~ms}$":[146],"compared":[147],"conformer":[151],"transducer.":[152]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
