{"id":"https://openalex.org/W4392903347","doi":"https://doi.org/10.1109/icassp48485.2024.10447571","title":"Personalization of CTC-Based End-to-End Speech Recognition Using Pronunciation-Driven Subword Tokenization","display_name":"Personalization of CTC-Based End-to-End Speech Recognition Using Pronunciation-Driven Subword Tokenization","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903347","doi":"https://doi.org/10.1109/icassp48485.2024.10447571"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447571","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447571","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040740925","display_name":"Zhihong Lei","orcid":"https://orcid.org/0000-0001-7846-3417"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zhihong Lei","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048094198","display_name":"Ernest Pusateri","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ernest Pusateri","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054574376","display_name":"Shiyi Han","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shiyi Han","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101842894","display_name":"Leo Liu","orcid":"https://orcid.org/0009-0008-1720-8533"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Leo Liu","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086280874","display_name":"Mingbin Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mingbin Xu","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110640042","display_name":"Tim Ng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Tim Ng","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079853614","display_name":"Ruchir Travadi","orcid":"https://orcid.org/0000-0002-1272-5282"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ruchir Travadi","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049670343","display_name":"Youyuan Zhang","orcid":"https://orcid.org/0000-0001-8572-4910"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Youyuan Zhang","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110926453","display_name":"Mirko Hannemann","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mirko Hannemann","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067041166","display_name":"Man-Hung Siu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Man-Hung Siu","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011008665","display_name":"Zhen Huang","orcid":"https://orcid.org/0000-0003-4819-373X"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zhen Huang","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210107260"],"apc_list":null,"apc_paid":null,"fwci":1.7915,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.86467097,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"10096","last_page":"10100"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8509833812713623},{"id":"https://openalex.org/keywords/pronunciation","display_name":"Pronunciation","score":0.7421985864639282},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.708655595779419},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.649499773979187},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.6253860592842102},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.5935381650924683},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.523007333278656},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.49293947219848633},{"id":"https://openalex.org/keywords/lexical-analysis","display_name":"Lexical analysis","score":0.4557762145996094},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.45564988255500793},{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.45012593269348145},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4237551689147949},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.08077237010002136}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8509833812713623},{"id":"https://openalex.org/C2780844864","wikidata":"https://www.wikidata.org/wiki/Q184377","display_name":"Pronunciation","level":2,"score":0.7421985864639282},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.708655595779419},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.649499773979187},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.6253860592842102},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5935381650924683},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.523007333278656},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.49293947219848633},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.4557762145996094},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.45564988255500793},{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.45012593269348145},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4237551689147949},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.08077237010002136},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447571","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447571","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1604697534","https://openalex.org/W2127141656","https://openalex.org/W2137446469","https://openalex.org/W2143612262","https://openalex.org/W2262393948","https://openalex.org/W2327501763","https://openalex.org/W2886319145","https://openalex.org/W2963211739","https://openalex.org/W2963250244","https://openalex.org/W2972625221","https://openalex.org/W3008037978","https://openalex.org/W3015480556","https://openalex.org/W3015486229","https://openalex.org/W3097239815","https://openalex.org/W3097777922","https://openalex.org/W3152221657","https://openalex.org/W3197478142","https://openalex.org/W3198442913","https://openalex.org/W4323066695","https://openalex.org/W4378105483","https://openalex.org/W4391021736","https://openalex.org/W6638218882","https://openalex.org/W6850218400","https://openalex.org/W6852909395"],"related_works":["https://openalex.org/W2114097550","https://openalex.org/W4385352507","https://openalex.org/W2918559346","https://openalex.org/W84309476","https://openalex.org/W4286904253","https://openalex.org/W2183593636","https://openalex.org/W2350724007","https://openalex.org/W2355751417","https://openalex.org/W2388033618","https://openalex.org/W2019287799"],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,75],"deep":[3],"learning":[4],"and":[5,83],"automatic":[6],"speech":[7,15,41],"recognition":[8,16,19,42],"have":[9],"improved":[10],"the":[11],"accuracy":[12,95],"of":[13,20],"end-to-end":[14,40],"systems,":[17],"but":[18],"personal":[21,64,92],"content":[22],"such":[23],"as":[24],"contact":[25],"names":[26],"remains":[27],"a":[28,55,99],"challenge.":[29],"In":[30],"this":[31,73],"work,":[32,52],"we":[33,53,87],"describe":[34],"our":[35],"personalization":[36],"solution":[37],"for":[38,58,63],"an":[39],"system":[43],"based":[44],"on":[45,50,96],"connectionist":[46],"temporal":[47],"classification.":[48],"Building":[49],"previous":[51],"present":[54],"novel":[56],"method":[57],"generating":[59],"additional":[60],"subword":[61],"tokenizations":[62],"entities":[65],"from":[66],"their":[67],"pronunciations.":[68],"We":[69],"show":[70],"that":[71],"using":[72],"technique":[74],"combination":[76],"with":[77,98],"two":[78],"established":[79],"techniques,":[80],"contextual":[81],"biasing":[82],"wordpiece":[84],"prior":[85],"normalization,":[86],"are":[88],"able":[89],"to":[90],"achieve":[91],"named":[93],"entity":[94],"par":[97],"competitive":[100],"hybrid":[101],"system.":[102]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
