{"id":"https://openalex.org/W4319862668","doi":"https://doi.org/10.1109/slt54892.2023.10022938","title":"Internal Language Model Personalization of E2E Automatic Speech Recognition Using Random Encoder Features","display_name":"Internal Language Model Personalization of E2E Automatic Speech Recognition Using Random Encoder Features","publication_year":2023,"publication_date":"2023-01-09","ids":{"openalex":"https://openalex.org/W4319862668","doi":"https://doi.org/10.1109/slt54892.2023.10022938"},"language":"en","primary_location":{"id":"doi:10.1109/slt54892.2023.10022938","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/slt54892.2023.10022938","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008253072","display_name":"Adam Stooke","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Adam Stooke","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032798707","display_name":"Khe Chai Sim","orcid":"https://orcid.org/0000-0002-0866-2223"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Khe Chai Sim","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114042018","display_name":"M. Chua","orcid":"https://orcid.org/0009-0009-2974-4509"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mason Chua","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062473985","display_name":"Tsendsuren Munkhdalai","orcid":"https://orcid.org/0000-0002-8783-4993"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tsendsuren Munkhdalai","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032931723","display_name":"Trevor Strohman","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Trevor Strohman","raw_affiliation_strings":["Google LLC,USA","Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC,USA","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5008253072"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":0.3396,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.45445081,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"abs/2012.00133","issue":null,"first_page":"213","last_page":"220"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8359239101409912},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.7492472529411316},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.7057939171791077},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6861536502838135},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6570391654968262},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5965279340744019},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.537183403968811},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4825384020805359},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4613533020019531},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.4161298871040344},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.4017389416694641},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.37881046533584595},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3645504415035248}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8359239101409912},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.7492472529411316},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.7057939171791077},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6861536502838135},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6570391654968262},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5965279340744019},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.537183403968811},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4825384020805359},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4613533020019531},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.4161298871040344},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.4017389416694641},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.37881046533584595},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3645504415035248},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt54892.2023.10022938","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/slt54892.2023.10022938","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6499999761581421,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1563460361","https://openalex.org/W1583837637","https://openalex.org/W1604697534","https://openalex.org/W1828163288","https://openalex.org/W2143612262","https://openalex.org/W2395440424","https://openalex.org/W2911544293","https://openalex.org/W2962760690","https://openalex.org/W2962826786","https://openalex.org/W2963240019","https://openalex.org/W2963362078","https://openalex.org/W2963414781","https://openalex.org/W2972625221","https://openalex.org/W2972880214","https://openalex.org/W2973051376","https://openalex.org/W3006752097","https://openalex.org/W3015194534","https://openalex.org/W3015654635","https://openalex.org/W3016234571","https://openalex.org/W3024464021","https://openalex.org/W3094667432","https://openalex.org/W3097714942","https://openalex.org/W3097777922","https://openalex.org/W3097890746","https://openalex.org/W3107298252","https://openalex.org/W3109311991","https://openalex.org/W3162639446","https://openalex.org/W3163462603","https://openalex.org/W3169722760","https://openalex.org/W3174905182","https://openalex.org/W3198442913","https://openalex.org/W3198769601","https://openalex.org/W3198836239","https://openalex.org/W3201341142","https://openalex.org/W3202725408","https://openalex.org/W3211278025","https://openalex.org/W4210690962","https://openalex.org/W4221151615","https://openalex.org/W4296069324","https://openalex.org/W4297841923","https://openalex.org/W6638749077","https://openalex.org/W6751104502","https://openalex.org/W6762242920","https://openalex.org/W6786752982","https://openalex.org/W6797067584"],"related_works":["https://openalex.org/W2759980945","https://openalex.org/W2740949665","https://openalex.org/W2786018489","https://openalex.org/W4206468315","https://openalex.org/W4309395198","https://openalex.org/W4223610296","https://openalex.org/W2153073500","https://openalex.org/W4319862668","https://openalex.org/W2033350476","https://openalex.org/W2952711665"],"abstract_inverted_index":{"End-to-end":[0],"(E2E)":[1],"speech-to-text":[2],"models":[3,80],"generally":[4],"require":[5],"transcribed":[6],"audio":[7,18],"for":[8,42,88],"training":[9],"and":[10,30,62,84],"personalization.":[11],"We":[12,53,91],"introduce":[13],"the":[14,26,46,57,96,116,132],"use":[15,97],"of":[16,59,98,129],"random":[17,102],"encoder":[19,103],"features,":[20],"rather":[21],"than":[22],"speech,":[23],"to":[24,95,105],"fine-tune":[25],"final":[27],"model":[28],"layers":[29],"acquire":[31],"new":[32,60],"vocabulary":[33,61],"from":[34],"text-only":[35],"data.":[36,52],"This":[37],"technique":[38],"can":[39],"be":[40,106],"used":[41],"on-device":[43],"personalization":[44],"before":[45],"user":[47,73],"has":[48],"provided":[49],"any":[50],"speech":[51],"show":[54,114],"improvements":[55],"in":[56],"recall":[58],"word":[63],"error":[64],"rate":[65],"(WER)":[66],"on":[67,75],"held-out":[68],"test":[69],"sets":[70],"using":[71,81],"simulated":[72],"experiments":[74],"hybrid":[76],"autoregressive":[77],"transducer":[78],"(HAT)":[79],"conformer-based":[82],"encoders":[83],"simple":[85],"text":[86],"embeddings":[87],"label":[89],"processing.":[90],"compare":[92],"this":[93],"approach":[94],"synthetic":[99],"audio,":[100],"finding":[101],"features":[104],"more":[107],"beneficial":[108],"with":[109],"lower":[110],"computational":[111],"cost.":[112],"Experiments":[113],"that":[115],"maximum":[117],"benefit":[118],"is":[119],"gained":[120],"by":[121],"updating":[122],"specific":[123],"network":[124],"components":[125],"comprising":[126],"a":[127],"subset":[128],"those":[130],"expressing":[131],"internal":[133],"language":[134],"model.":[135]},"counts_by_year":[{"year":2023,"cited_by_count":3}],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
