{"id":"https://openalex.org/W4319862261","doi":"https://doi.org/10.1109/slt54892.2023.10022705","title":"Personalization of CTC Speech Recognition Models","display_name":"Personalization of CTC Speech Recognition Models","publication_year":2023,"publication_date":"2023-01-09","ids":{"openalex":"https://openalex.org/W4319862261","doi":"https://doi.org/10.1109/slt54892.2023.10022705"},"language":"en","primary_location":{"id":"doi:10.1109/slt54892.2023.10022705","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt54892.2023.10022705","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027419911","display_name":"Saket Dingliwal","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Saket Dingliwal","raw_affiliation_strings":["Amazon AWS AI"],"affiliations":[{"raw_affiliation_string":"Amazon AWS AI","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014130267","display_name":"Monica Sunkara","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Monica Sunkara","raw_affiliation_strings":["Amazon AWS AI"],"affiliations":[{"raw_affiliation_string":"Amazon AWS AI","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004616142","display_name":"Srikanth Ronanki","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Srikanth Ronanki","raw_affiliation_strings":["Amazon AWS AI"],"affiliations":[{"raw_affiliation_string":"Amazon AWS AI","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027803608","display_name":"Jeff Farris","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeff Farris","raw_affiliation_strings":["Amazon AWS AI"],"affiliations":[{"raw_affiliation_string":"Amazon AWS AI","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050325468","display_name":"Katrin Kirchhoff","orcid":"https://orcid.org/0000-0002-6645-6030"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Katrin Kirchhoff","raw_affiliation_strings":["Amazon AWS AI"],"affiliations":[{"raw_affiliation_string":"Amazon AWS AI","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050552820","display_name":"Sravan Bodapati","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sravan Bodapati","raw_affiliation_strings":["Amazon AWS AI"],"affiliations":[{"raw_affiliation_string":"Amazon AWS AI","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5027419911"],"corresponding_institution_ids":["https://openalex.org/I1311688040"],"apc_list":null,"apc_paid":null,"fwci":2.1476,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.89016137,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"302","last_page":"309"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8418808579444885},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6123203039169312},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5826598405838013},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.5607845187187195},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5193106532096863},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.4817892909049988},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4070679843425751}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8418808579444885},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6123203039169312},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5826598405838013},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.5607845187187195},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5193106532096863},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.4817892909049988},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4070679843425751},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt54892.2023.10022705","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt54892.2023.10022705","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1828163288","https://openalex.org/W2143612262","https://openalex.org/W2293829681","https://openalex.org/W2327501763","https://openalex.org/W2477293991","https://openalex.org/W2526425061","https://openalex.org/W2766219058","https://openalex.org/W2886319145","https://openalex.org/W2911291251","https://openalex.org/W2937402758","https://openalex.org/W2962760690","https://openalex.org/W2962780374","https://openalex.org/W2964107261","https://openalex.org/W3015486229","https://openalex.org/W3094667432","https://openalex.org/W3095311338","https://openalex.org/W3097625183","https://openalex.org/W3097777922","https://openalex.org/W3097794466","https://openalex.org/W3097973766","https://openalex.org/W3119308075","https://openalex.org/W3140235797","https://openalex.org/W3163462603","https://openalex.org/W3163793923","https://openalex.org/W3168770049","https://openalex.org/W3197478142","https://openalex.org/W3197688480","https://openalex.org/W3211278025","https://openalex.org/W4210811812","https://openalex.org/W4224918838","https://openalex.org/W4224918897","https://openalex.org/W4296068409","https://openalex.org/W4296068796","https://openalex.org/W4297818305","https://openalex.org/W4385245566","https://openalex.org/W6638749077","https://openalex.org/W6747270024","https://openalex.org/W6777970034"],"related_works":["https://openalex.org/W2125652721","https://openalex.org/W1540371141","https://openalex.org/W1549363203","https://openalex.org/W4231274751","https://openalex.org/W2154063878","https://openalex.org/W2556012038","https://openalex.org/W1489772951","https://openalex.org/W1518215897","https://openalex.org/W3082059448","https://openalex.org/W4313640622"],"abstract_inverted_index":{"End-to-end":[0],"speech":[1],"recognition":[2],"models":[3,37],"trained":[4],"using":[5],"joint":[6],"Connectionist":[7],"Temporal":[8],"Classification":[9],"(CTC)-Attention":[10],"loss":[11],"have":[12],"gained":[13],"popularity":[14],"recently.":[15],"In":[16],"these":[17],"models,":[18],"a":[19,65,77,117,128],"non-autoregressive":[20],"CTC":[21,130],"decoder":[22],"is":[23],"often":[24],"used":[25],"at":[26],"inference":[27],"time":[28,54],"due":[29],"to":[30,40,56,98,115],"its":[31],"speed":[32],"and":[33,83,87,92,111],"simplicity.":[34],"However,":[35],"such":[36],"are":[38],"hard":[39],"personalize":[41],"because":[42],"of":[43,80],"their":[44],"conditional":[45],"independence":[46],"assumption":[47],"that":[48,69],"prevents":[49],"output":[50],"tokens":[51],"from":[52],"previous":[53],"steps":[55],"influence":[57],"future":[58],"predictions.":[59],"To":[60],"tackle":[61],"this,":[62],"we":[63],"propose":[64],"novel":[66],"two-way":[67],"approach":[68,107],"first":[70],"biases":[71],"the":[72,101],"encoder":[73],"with":[74],"attention":[75],"over":[76,127],"predefined":[78],"list":[79],"rare":[81,125],"long-tail":[82],"out-of-vocabulary":[84],"(OOV)":[85],"words":[86,126],"then":[88],"uses":[89],"dynamic":[90],"boosting":[91],"phone":[93],"alignment":[94],"network":[95],"during":[96],"decoding":[97],"further":[99],"bias":[100],"subword":[102],"pre-dictions.":[103],"We":[104],"evaluate":[105],"our":[106],"on":[108,123],"open-source":[109],"VoxPopuli":[110],"in-house":[112],"medical":[113],"datasets":[114],"showcase":[116],"60%":[118],"improvement":[119],"in":[120],"F1":[121],"score":[122],"domain-specific":[124],"strong":[129],"baseline.":[131]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":10}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
