{"id":"https://openalex.org/W4319862444","doi":"https://doi.org/10.1109/slt54892.2023.10023291","title":"Conformer-Based on-Device Streaming Speech Recognition with KD Compression and Two-Pass Architecture","display_name":"Conformer-Based on-Device Streaming Speech Recognition with KD Compression and Two-Pass Architecture","publication_year":2023,"publication_date":"2023-01-09","ids":{"openalex":"https://openalex.org/W4319862444","doi":"https://doi.org/10.1109/slt54892.2023.10023291"},"language":"en","primary_location":{"id":"doi:10.1109/slt54892.2023.10023291","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt54892.2023.10023291","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108758586","display_name":"Jin-Hwan Park","orcid":null},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jinhwan Park","raw_affiliation_strings":["Samsung Research,Seoul,South Korea","Samsung Research, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Research,Seoul,South Korea","institution_ids":["https://openalex.org/I2250650973"]},{"raw_affiliation_string":"Samsung Research, Seoul, South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101747609","display_name":"Sichen Jin","orcid":"https://orcid.org/0000-0002-5591-7532"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sichen Jin","raw_affiliation_strings":["Samsung Research,Seoul,South Korea","Samsung Research, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Research,Seoul,South Korea","institution_ids":["https://openalex.org/I2250650973"]},{"raw_affiliation_string":"Samsung Research, Seoul, South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029787807","display_name":"Jun\u2010Mo Park","orcid":"https://orcid.org/0000-0001-5997-0296"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Junmo Park","raw_affiliation_strings":["Samsung Research,Seoul,South Korea","Samsung Research, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Research,Seoul,South Korea","institution_ids":["https://openalex.org/I2250650973"]},{"raw_affiliation_string":"Samsung Research, Seoul, South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100340036","display_name":"Sung-Soo Kim","orcid":"https://orcid.org/0000-0003-3207-4648"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sungsoo Kim","raw_affiliation_strings":["Samsung Research,Seoul,South Korea","Samsung Research, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Research,Seoul,South Korea","institution_ids":["https://openalex.org/I2250650973"]},{"raw_affiliation_string":"Samsung Research, Seoul, South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005882133","display_name":"Dhairya Sandhyana","orcid":null},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Dhairya Sandhyana","raw_affiliation_strings":["Samsung Research,Seoul,South Korea","Samsung Research, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Research,Seoul,South Korea","institution_ids":["https://openalex.org/I2250650973"]},{"raw_affiliation_string":"Samsung Research, Seoul, South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036381427","display_name":"Changheon Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Changheon Lee","raw_affiliation_strings":["Samsung Electronics,AI R&#x0026;D Group,Suwon,South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Electronics,AI R&#x0026;D Group,Suwon,South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085865703","display_name":"Myoungji Han","orcid":null},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Myoungji Han","raw_affiliation_strings":["Samsung Electronics,AI R&#x0026;D Group,Suwon,South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Electronics,AI R&#x0026;D Group,Suwon,South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004902602","display_name":"Jungin Lee","orcid":"https://orcid.org/0000-0003-0390-119X"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jungin Lee","raw_affiliation_strings":["Samsung Electronics,AI R&#x0026;D Group,Suwon,South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Electronics,AI R&#x0026;D Group,Suwon,South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057180394","display_name":"Seokyeong Jung","orcid":null},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Seokyeong Jung","raw_affiliation_strings":["Samsung Electronics,AI R&#x0026;D Group,Suwon,South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Electronics,AI R&#x0026;D Group,Suwon,South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101194806","display_name":"Changwoo Han","orcid":null},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Changwoo Han","raw_affiliation_strings":["Samsung Research,Seoul,South Korea","Samsung Research, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Research,Seoul,South Korea","institution_ids":["https://openalex.org/I2250650973"]},{"raw_affiliation_string":"Samsung Research, Seoul, South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100684422","display_name":"Chanwoo Kim","orcid":"https://orcid.org/0000-0003-0193-8167"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Chanwoo Kim","raw_affiliation_strings":["Samsung Research,Seoul,South Korea","Samsung Research, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Research,Seoul,South Korea","institution_ids":["https://openalex.org/I2250650973"]},{"raw_affiliation_string":"Samsung Research, Seoul, South Korea","institution_ids":["https://openalex.org/I2250650973"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5108758586"],"corresponding_institution_ids":["https://openalex.org/I2250650973"],"apc_list":null,"apc_paid":null,"fwci":0.5652,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.59656429,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"92","last_page":"99"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8284636735916138},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6536756753921509},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5365239381790161},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5092200040817261},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.47653087973594666},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.4631476104259491},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4613521099090576},{"id":"https://openalex.org/keywords/companding","display_name":"Companding","score":0.4228309094905853},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.41545575857162476},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3755360543727875},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.3337818384170532},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.3217286169528961},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.2516060471534729},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.20136821269989014}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8284636735916138},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6536756753921509},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5365239381790161},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5092200040817261},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.47653087973594666},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.4631476104259491},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4613521099090576},{"id":"https://openalex.org/C104250799","wikidata":"https://www.wikidata.org/wiki/Q1780765","display_name":"Companding","level":4,"score":0.4228309094905853},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.41545575857162476},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3755360543727875},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3337818384170532},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.3217286169528961},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2516060471534729},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.20136821269989014},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C40409654","wikidata":"https://www.wikidata.org/wiki/Q375889","display_name":"Orthogonal frequency-division multiplexing","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt54892.2023.10023291","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt54892.2023.10023291","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.41999998688697815}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W1494198834","https://openalex.org/W1915251500","https://openalex.org/W2137269967","https://openalex.org/W2143612262","https://openalex.org/W2326699523","https://openalex.org/W2525778437","https://openalex.org/W2622203030","https://openalex.org/W2898997786","https://openalex.org/W2936774411","https://openalex.org/W2962760690","https://openalex.org/W2964110616","https://openalex.org/W2972630480","https://openalex.org/W2972816482","https://openalex.org/W2973122799","https://openalex.org/W3007528493","https://openalex.org/W3008181812","https://openalex.org/W3095311338","https://openalex.org/W3096104971","https://openalex.org/W3096758108","https://openalex.org/W3097777922","https://openalex.org/W3149509723","https://openalex.org/W3149629662","https://openalex.org/W3152100107","https://openalex.org/W3160766462","https://openalex.org/W3161022068","https://openalex.org/W3162649911","https://openalex.org/W3163203022","https://openalex.org/W3163793923","https://openalex.org/W3163842642","https://openalex.org/W3163907627","https://openalex.org/W4210435435","https://openalex.org/W4250482878","https://openalex.org/W4320930577","https://openalex.org/W6623517193","https://openalex.org/W6640059789","https://openalex.org/W6727690538","https://openalex.org/W6746023985","https://openalex.org/W6747158283"],"related_works":["https://openalex.org/W2100336732","https://openalex.org/W2367936931","https://openalex.org/W3080136773","https://openalex.org/W1826521293","https://openalex.org/W2188969719","https://openalex.org/W2131556841","https://openalex.org/W3005895185","https://openalex.org/W2100441082","https://openalex.org/W2996122240","https://openalex.org/W164170074"],"abstract_inverted_index":{"This":[0],"paper":[1],"introduces":[2],"a":[3,26,58,94,155],"two-pass":[4,150],"on-device":[5,127],"automatic":[6],"speech":[7],"recognition":[8],"(ASR)":[9],"system,":[10],"which":[11,137,191],"is":[12,23,87],"developed":[13,186],"for":[14,51,142],"commercialized":[15],"devices.":[16],"The":[17,83,124,145,185],"first":[18,47],"pass":[19],"of":[20,120],"the":[21,35,46,49,52,63,68,75,110,113,118,121,139,149,168,182,193],"system":[22,147,170,187,195],"based":[24],"on":[25,112,171,177],"causal":[27],"Conformer-transducer":[28],"model":[29,61,77,85,153,157],"to":[30,108],"generate":[31],"partial":[32],"results":[33,176],"from":[34,71,181],"input":[36,43],"audio":[37],"stream.":[38],"After":[39],"processing":[40],"an":[41],"entire":[42,169],"utterance":[44],"in":[45,62,98,161],"pass,":[48],"candidates":[50],"final":[53],"result":[54],"are":[55],"rescored":[56],"with":[57,93,117,196],"full-context":[59,76],"attention":[60],"second":[64],"pass.":[65],"To":[66],"minimize":[67],"computational":[69],"overhead":[70],"rescoring,":[72],"we":[73],"compress":[74],"by":[78,89],"applying":[79],"knowledge":[80],"distillation":[81],"(KD).":[82],"total":[84],"size":[86],"reduced":[88],"35%":[90],"after":[91,163],"KD":[92],"0.02%":[95],"absolute":[96],"loss":[97],"word":[99],"error":[100],"rate":[101],"(WER).":[102],"We":[103,166],"also":[104],"introduce":[105],"decoding":[106],"techniques":[107,125],"boost":[109],"accuracy":[111],"test":[114,178],"cases":[115],"mismatched":[116],"distribution":[119],"training":[122],"set.":[123],"include":[126],"personal":[128],"adaptation,":[129],"spell":[130],"correction":[131],"and":[132,154,174],"handling":[133],"incorrectly":[134],"segmented":[135],"speech,":[136],"solve":[138],"critical":[140],"issues":[141],"production-grade":[143],"systems.":[144],"whole":[146],"including":[148],"end-to-end":[151],"(E2E)":[152],"language":[156],"(LM)":[158],"occupies":[159],"72MB":[160],"storage":[162],"8-bit":[164],"quantization.":[165],"demonstrate":[167],"mobile":[172],"devices":[173],"report":[175],"sets":[179],"collected":[180],"production":[183],"environment.":[184],"achieves":[188],"5.65%":[189],"WER":[190,199],"surpasses":[192],"baseline":[194],"39%":[197],"relative":[198],"improvement.":[200]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
