{"id":"https://openalex.org/W3202419788","doi":"https://doi.org/10.1109/icassp43922.2022.9746908","title":"Factorized Neural Transducer for Efficient Language Model Adaptation","display_name":"Factorized Neural Transducer for Efficient Language Model Adaptation","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W3202419788","doi":"https://doi.org/10.1109/icassp43922.2022.9746908","mag":"3202419788"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746908","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746908","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100329117","display_name":"Xie Chen","orcid":"https://orcid.org/0000-0001-5801-2571"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xie Chen","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101749753","display_name":"Zhong Meng","orcid":"https://orcid.org/0000-0001-7814-5929"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhong Meng","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102376243","display_name":"Sarangarajan Parthasarathy","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sarangarajan Parthasarathy","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100365053","display_name":"Jinyu Li","orcid":"https://orcid.org/0000-0002-1089-9748"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinyu Li","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100329117"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":2.8182,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.92150315,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"8132","last_page":"8136"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8350085020065308},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.7206908464431763},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6666288375854492},{"id":"https://openalex.org/keywords/transducer","display_name":"Transducer","score":0.5752492547035217},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5737854242324829},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5580920577049255},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.47109636664390564},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4582393169403076},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.45791664719581604},{"id":"https://openalex.org/keywords/lexicon","display_name":"Lexicon","score":0.44561341404914856},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4277634024620056},{"id":"https://openalex.org/keywords/cache-language-model","display_name":"Cache language model","score":0.414877325296402},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.22811082005500793},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.12060418725013733},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11573690176010132}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8350085020065308},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.7206908464431763},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6666288375854492},{"id":"https://openalex.org/C56318395","wikidata":"https://www.wikidata.org/wiki/Q215928","display_name":"Transducer","level":2,"score":0.5752492547035217},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5737854242324829},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5580920577049255},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.47109636664390564},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4582393169403076},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.45791664719581604},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.44561341404914856},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4277634024620056},{"id":"https://openalex.org/C39608478","wikidata":"https://www.wikidata.org/wiki/Q5015979","display_name":"Cache language model","level":5,"score":0.414877325296402},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.22811082005500793},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.12060418725013733},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11573690176010132},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C83479923","wikidata":"https://www.wikidata.org/wiki/Q2063748","display_name":"Universal Networking Language","level":4,"score":0.0},{"id":"https://openalex.org/C129353971","wikidata":"https://www.wikidata.org/wiki/Q5156949","display_name":"Comprehension approach","level":3,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746908","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746908","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.75,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W1828163288","https://openalex.org/W2020073413","https://openalex.org/W2133564696","https://openalex.org/W2327501763","https://openalex.org/W2399550240","https://openalex.org/W2750499125","https://openalex.org/W2889152503","https://openalex.org/W2936123380","https://openalex.org/W2962760690","https://openalex.org/W2962824709","https://openalex.org/W2963240019","https://openalex.org/W2963414781","https://openalex.org/W2963827914","https://openalex.org/W2963970535","https://openalex.org/W2964272710","https://openalex.org/W2964308564","https://openalex.org/W2987019345","https://openalex.org/W3006752097","https://openalex.org/W3007227084","https://openalex.org/W3008037978","https://openalex.org/W3012775809","https://openalex.org/W3015194534","https://openalex.org/W3015686596","https://openalex.org/W3016010032","https://openalex.org/W3016234571","https://openalex.org/W3094667432","https://openalex.org/W3152221657","https://openalex.org/W3153201663","https://openalex.org/W3161873870","https://openalex.org/W3162244132","https://openalex.org/W3162665866","https://openalex.org/W3163560333","https://openalex.org/W3184976814","https://openalex.org/W3193461931","https://openalex.org/W3197661863","https://openalex.org/W3205788551","https://openalex.org/W4288088457","https://openalex.org/W4294619417","https://openalex.org/W6623517193","https://openalex.org/W6638749077","https://openalex.org/W6679434410","https://openalex.org/W6741807409","https://openalex.org/W6747158283","https://openalex.org/W6747398299","https://openalex.org/W6769806307","https://openalex.org/W6770245836","https://openalex.org/W6775172608","https://openalex.org/W6784916438","https://openalex.org/W6802564859"],"related_works":["https://openalex.org/W4255155614","https://openalex.org/W2394860946","https://openalex.org/W761725120","https://openalex.org/W2125971872","https://openalex.org/W3021690593","https://openalex.org/W2888189389","https://openalex.org/W2161188302","https://openalex.org/W4200200210","https://openalex.org/W2402899696","https://openalex.org/W3089901025"],"abstract_inverted_index":{"In":[0,83],"recent":[1],"years,":[2],"end-to-end":[3],"(E2E)":[4],"based":[5,24,32],"automatic":[6],"speech":[7,133],"recognition":[8],"(ASR)":[9],"systems":[10,34],"have":[11,36],"achieved":[12],"great":[13],"success":[14],"due":[15],"to":[16,39,85,129,142,155],"their":[17,78],"simplicity":[18],"and":[19,35,56,101,104],"promising":[20],"performance.":[21],"Neural":[22],"Transducer":[23,62,131,152],"models":[25],"are":[26],"increasingly":[27],"popular":[28],"in":[29,45,60,67,81,176],"streaming":[30],"E2E":[31],"ASR":[33,69],"been":[37],"reported":[38],"outperform":[40],"the":[41,49,99,111,122,125,130,148,170],"traditional":[42],"hybrid":[43],"system":[44],"some":[46],"scenarios.":[47],"However,":[48],"joint":[50],"optimization":[51],"of":[52,124,172],"acoustic":[53],"model,":[54,93],"lexicon":[55],"language":[57,108,127,138,166],"model":[58,109,128,139,167],"(LM)":[59],"neural":[61,95,151],"also":[63],"brings":[64],"about":[65],"challenges":[66],"adapting":[68],"using":[70],"just":[71],"adaptation":[72,140],"text.":[73],"This":[74],"draw-back":[75],"might":[76],"prevent":[77],"potential":[79],"applications":[80],"practice.":[82],"order":[84],"address":[86],"this":[87,118],"issue,":[88],"we":[89],"propose":[90],"a":[91,106,173,179],"novel":[92],"factorized":[94,150],"Transducer,":[96],"by":[97],"factorizing":[98],"blank":[100],"vocabulary":[102,112],"prediction,":[103],"adopting":[105],"standalone":[107,126],"for":[110,132,165],"prediction.":[113],"It":[114],"is":[115,163],"expected":[116],"that":[117,147],"factorization":[119],"can":[120],"transfer":[121],"improvement":[123],"recognition,":[134],"which":[135],"allows":[136],"various":[137],"techniques":[141],"be":[143],"applied.":[144],"We":[145],"demonstrate":[146],"proposed":[149],"yields":[153],"15.4%":[154],"19.4%":[156],"WER":[157,177],"improvements":[158],"when":[159],"out-of-domain":[160],"text":[161],"data":[162],"used":[164],"adaptation,":[168],"at":[169],"cost":[171],"minor":[174],"degradation":[175],"on":[178],"general":[180],"test":[181],"set.":[182]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
