{"id":"https://openalex.org/W4319862420","doi":"https://doi.org/10.1109/slt54892.2023.10022543","title":"Streaming, Fast and Accurate on-Device Inverse Text Normalization for Automatic Speech Recognition","display_name":"Streaming, Fast and Accurate on-Device Inverse Text Normalization for Automatic Speech Recognition","publication_year":2023,"publication_date":"2023-01-09","ids":{"openalex":"https://openalex.org/W4319862420","doi":"https://doi.org/10.1109/slt54892.2023.10022543"},"language":"en","primary_location":{"id":"doi:10.1109/slt54892.2023.10022543","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/slt54892.2023.10022543","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034136587","display_name":"Yashesh Gaur","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["FI","US"],"is_corresponding":true,"raw_author_name":"Yashesh Gaur","raw_affiliation_strings":["Microsoft Corp.,U.S.A","Microsoft Corp., U.S.A"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp.,U.S.A","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"raw_affiliation_string":"Microsoft Corp., U.S.A","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033810208","display_name":"Nick Kibre","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI","US"],"is_corresponding":false,"raw_author_name":"Nick Kibre","raw_affiliation_strings":["Microsoft Corp.,U.S.A","Microsoft Corp., U.S.A"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp.,U.S.A","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"raw_affiliation_string":"Microsoft Corp., U.S.A","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100763213","display_name":"Jian Xue","orcid":"https://orcid.org/0000-0002-9460-802X"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]}],"countries":["FI","US"],"is_corresponding":false,"raw_author_name":"Jian Xue","raw_affiliation_strings":["Microsoft Corp.,U.S.A","Microsoft Corp., U.S.A"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp.,U.S.A","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"raw_affiliation_string":"Microsoft Corp., U.S.A","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062043763","display_name":"Kangyuan Shu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["FI","US"],"is_corresponding":false,"raw_author_name":"Kangyuan Shu","raw_affiliation_strings":["Microsoft Corp.,U.S.A","Microsoft Corp., U.S.A"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp.,U.S.A","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"raw_affiliation_string":"Microsoft Corp., U.S.A","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100330357","display_name":"Yuhui Wang","orcid":"https://orcid.org/0000-0002-0502-7486"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["FI","US"],"is_corresponding":false,"raw_author_name":"Yuhui Wang","raw_affiliation_strings":["Microsoft Corp.,U.S.A","Microsoft Corp., U.S.A"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp.,U.S.A","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"raw_affiliation_string":"Microsoft Corp., U.S.A","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033217035","display_name":"Issac Alphanso","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["FI","US"],"is_corresponding":false,"raw_author_name":"Issac Alphanso","raw_affiliation_strings":["Microsoft Corp.,U.S.A","Microsoft Corp., U.S.A"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp.,U.S.A","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"raw_affiliation_string":"Microsoft Corp., U.S.A","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100365053","display_name":"Jinyu Li","orcid":"https://orcid.org/0000-0002-1089-9748"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["FI","US"],"is_corresponding":false,"raw_author_name":"Jinyu Li","raw_affiliation_strings":["Microsoft Corp.,U.S.A","Microsoft Corp., U.S.A"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp.,U.S.A","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"raw_affiliation_string":"Microsoft Corp., U.S.A","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101928537","display_name":"Yifan Gong","orcid":"https://orcid.org/0000-0002-3912-097X"},"institutions":[{"id":"https://openalex.org/I4210105678","display_name":"Microsoft (Finland)","ror":"https://ror.org/01nehjf29","country_code":"FI","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["FI","US"],"is_corresponding":false,"raw_author_name":"Yifan Gong","raw_affiliation_strings":["Microsoft Corp.,U.S.A","Microsoft Corp., U.S.A"],"affiliations":[{"raw_affiliation_string":"Microsoft Corp.,U.S.A","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"]},{"raw_affiliation_string":"Microsoft Corp., U.S.A","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5034136587"],"corresponding_institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210105678"],"apc_list":null,"apc_paid":null,"fwci":0.3391,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.45028631,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"abs/1611. 00068","issue":null,"first_page":"237","last_page":"244"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8277156352996826},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.7565527558326721},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6240532398223877},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.4936736524105072},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46991148591041565},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.44384005665779114},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4075682759284973},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.07896247506141663}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8277156352996826},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.7565527558326721},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6240532398223877},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.4936736524105072},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46991148591041565},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.44384005665779114},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4075682759284973},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.07896247506141663},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt54892.2023.10022543","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/slt54892.2023.10022543","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.41999998688697815,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W27049869","https://openalex.org/W145178388","https://openalex.org/W197865384","https://openalex.org/W1507805082","https://openalex.org/W1828163288","https://openalex.org/W2164107060","https://openalex.org/W2401055968","https://openalex.org/W2402040300","https://openalex.org/W2551895583","https://openalex.org/W2553910756","https://openalex.org/W2749051922","https://openalex.org/W2908510526","https://openalex.org/W2914430521","https://openalex.org/W2963010571","https://openalex.org/W2963250244","https://openalex.org/W2963827914","https://openalex.org/W2975381464","https://openalex.org/W3016010032","https://openalex.org/W3095311338","https://openalex.org/W3160789530","https://openalex.org/W3161873870","https://openalex.org/W4283728058","https://openalex.org/W4295276571","https://openalex.org/W4385245566","https://openalex.org/W6605854414","https://openalex.org/W6638749077","https://openalex.org/W6729005282","https://openalex.org/W6729763630","https://openalex.org/W6748380982","https://openalex.org/W6757817989","https://openalex.org/W6768080748"],"related_works":["https://openalex.org/W2591697403","https://openalex.org/W2109940557","https://openalex.org/W2466832359","https://openalex.org/W2953716828","https://openalex.org/W2904857019","https://openalex.org/W2944728705","https://openalex.org/W3011538607","https://openalex.org/W2904022177","https://openalex.org/W4321441197","https://openalex.org/W2359348847"],"abstract_inverted_index":{"Automatic":[0],"Speech":[1],"Recognition":[2],"(ASR)":[3],"systems":[4,23],"typically":[5],"yield":[6],"output":[7],"in":[8,145],"lexical":[9,95],"form.":[10],"However,":[11],"humans":[12],"prefer":[13],"a":[14,89],"written":[15],"form":[16],"output.":[17],"To":[18],"bridge":[19],"this":[20,49,65],"gap,":[21],"ASR":[22],"usually":[24],"employ":[25],"Inverse":[26],"Text":[27],"Normalization":[28],"(ITN).":[29],"In":[30,64],"previous":[31],"works,":[32],"Weighted":[33],"Finite":[34],"State":[35],"Transducers":[36],"(WFST)":[37],"have":[38],"been":[39],"employed":[40],"to":[41,48,123,138],"do":[42],"ITN.":[43],"WFSTs":[44],"are":[45],"nicely":[46],"suited":[47],"task":[50],"but":[51],"their":[52],"size":[53,146],"and":[54,147],"run-time":[55],"costs":[56],"can":[57],"make":[58],"deployment":[59],"on":[60,119],"embedded":[61],"applications":[62],"challenging.":[63],"paper,":[66],"we":[67,113],"describe":[68],"the":[69,83,120,126,132],"development":[70],"of":[71,85],"an":[72,115],"on-device":[73],"ITN":[74,103,127,134],"system":[75,87],"that":[76,93,131],"is":[77,88],"streaming,":[78],"lightweight":[79],"&":[80],"accurate.":[81],"At":[82],"core":[84],"our":[86],"streaming":[90],"transformer":[91],"tagger,":[92],"tags":[94],"tokens":[96],"from":[97],"ASR.":[98],"The":[99],"tag":[100],"informs":[101],"which":[102],"category":[104],"might":[105],"be":[106],"applied,":[107],"if":[108],"at":[109],"all.":[110],"Following":[111],"that,":[112],"apply":[114],"ITN-category-specific":[116],"WFST,":[117],"only":[118],"tagged":[121],"text,":[122],"reliably":[124],"perform":[125],"conversion.":[128],"We":[129],"show":[130],"proposed":[133],"solution":[135],"performs":[136],"equivalent":[137],"strong":[139],"base-lines,":[140],"while":[141],"being":[142],"significantly":[143],"smaller":[144],"retaining":[148],"customization":[149],"capabilities.":[150]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
