{"id":"https://openalex.org/W4309679352","doi":"https://doi.org/10.1109/kse56063.2022.9953791","title":"Non-Standard Vietnamese Word Detection and Normalization for Text\u2013to\u2013Speech","display_name":"Non-Standard Vietnamese Word Detection and Normalization for Text\u2013to\u2013Speech","publication_year":2022,"publication_date":"2022-10-19","ids":{"openalex":"https://openalex.org/W4309679352","doi":"https://doi.org/10.1109/kse56063.2022.9953791"},"language":"en","primary_location":{"id":"doi:10.1109/kse56063.2022.9953791","is_oa":false,"landing_page_url":"https://doi.org/10.1109/kse56063.2022.9953791","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 14th International Conference on Knowledge and Systems Engineering (KSE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032380641","display_name":"Huu-Tien Dang","orcid":null},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]},{"id":"https://openalex.org/I67868205","display_name":"VNU University of Science","ror":"https://ror.org/05w54hk79","country_code":"VN","type":"education","lineage":["https://openalex.org/I177233841","https://openalex.org/I67868205"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Huu-Tien Dang","raw_affiliation_strings":["FPT Technology Research Institute, FPT University,Hanoi,Vietnam","FPT Technology Research Institute, FPT University, Hanoi, Vietnam","VNU University of Engineering and Technology, Hanoi, Vietnam"],"affiliations":[{"raw_affiliation_string":"FPT Technology Research Institute, FPT University,Hanoi,Vietnam","institution_ids":["https://openalex.org/I109689652"]},{"raw_affiliation_string":"FPT Technology Research Institute, FPT University, Hanoi, Vietnam","institution_ids":["https://openalex.org/I109689652"]},{"raw_affiliation_string":"VNU University of Engineering and Technology, Hanoi, Vietnam","institution_ids":["https://openalex.org/I67868205"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081773521","display_name":"Thi-Hai-Yen Vuong","orcid":"https://orcid.org/0000-0002-8019-7178"},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Thi-Hai-Yen Vuong","raw_affiliation_strings":["FPT Technology Research Institute, FPT University,Hanoi,Vietnam","FPT Technology Research Institute, FPT University, Hanoi, Vietnam"],"affiliations":[{"raw_affiliation_string":"FPT Technology Research Institute, FPT University,Hanoi,Vietnam","institution_ids":["https://openalex.org/I109689652"]},{"raw_affiliation_string":"FPT Technology Research Institute, FPT University, Hanoi, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012773199","display_name":"Xuan-Hieu Phan","orcid":"https://orcid.org/0000-0002-7640-9190"},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Xuan-Hieu Phan","raw_affiliation_strings":["FPT Technology Research Institute, FPT University,Hanoi,Vietnam","FPT Technology Research Institute, FPT University, Hanoi, Vietnam"],"affiliations":[{"raw_affiliation_string":"FPT Technology Research Institute, FPT University,Hanoi,Vietnam","institution_ids":["https://openalex.org/I109689652"]},{"raw_affiliation_string":"FPT Technology Research Institute, FPT University, Hanoi, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5032380641"],"corresponding_institution_ids":["https://openalex.org/I109689652","https://openalex.org/I67868205"],"apc_list":null,"apc_paid":null,"fwci":0.5305,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.71454573,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/crfs","display_name":"CRFS","score":0.7383119463920593},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6989256143569946},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6966476440429688},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6820070743560791},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.6042424440383911},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5619957447052002},{"id":"https://openalex.org/keywords/conditional-random-field","display_name":"Conditional random field","score":0.5097143054008484},{"id":"https://openalex.org/keywords/clef","display_name":"Clef","score":0.5091521143913269},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5015699863433838},{"id":"https://openalex.org/keywords/vietnamese","display_name":"Vietnamese","score":0.4983506202697754},{"id":"https://openalex.org/keywords/lexicon","display_name":"Lexicon","score":0.4677479565143585},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10427960753440857},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.07850638031959534}],"concepts":[{"id":"https://openalex.org/C2775953691","wikidata":"https://www.wikidata.org/wiki/Q5013874","display_name":"CRFS","level":3,"score":0.7383119463920593},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6989256143569946},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6966476440429688},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6820070743560791},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.6042424440383911},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5619957447052002},{"id":"https://openalex.org/C152565575","wikidata":"https://www.wikidata.org/wiki/Q1124538","display_name":"Conditional random field","level":2,"score":0.5097143054008484},{"id":"https://openalex.org/C107763842","wikidata":"https://www.wikidata.org/wiki/Q181040","display_name":"Clef","level":3,"score":0.5091521143913269},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5015699863433838},{"id":"https://openalex.org/C103621254","wikidata":"https://www.wikidata.org/wiki/Q9199","display_name":"Vietnamese","level":2,"score":0.4983506202697754},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.4677479565143585},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10427960753440857},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.07850638031959534},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/kse56063.2022.9953791","is_oa":false,"landing_page_url":"https://doi.org/10.1109/kse56063.2022.9953791","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 14th International Conference on Knowledge and Systems Engineering (KSE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7599999904632568,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1499275966","https://openalex.org/W1522301498","https://openalex.org/W2063178951","https://openalex.org/W2064675550","https://openalex.org/W2147800946","https://openalex.org/W2147880316","https://openalex.org/W2164107060","https://openalex.org/W2164280277","https://openalex.org/W2252192936","https://openalex.org/W2293206651","https://openalex.org/W2747175885","https://openalex.org/W2806253224","https://openalex.org/W2945656493","https://openalex.org/W2962902328","https://openalex.org/W2964199361","https://openalex.org/W2969257556","https://openalex.org/W3095073040","https://openalex.org/W3106295233","https://openalex.org/W3154516348","https://openalex.org/W3161730088","https://openalex.org/W3176889427","https://openalex.org/W3194932072","https://openalex.org/W3209565016","https://openalex.org/W6631190155","https://openalex.org/W6682082992","https://openalex.org/W6800762109"],"related_works":["https://openalex.org/W2356597680","https://openalex.org/W50079190","https://openalex.org/W182104056","https://openalex.org/W2111726165","https://openalex.org/W2011251309","https://openalex.org/W2511246383","https://openalex.org/W3108423214","https://openalex.org/W2796133761","https://openalex.org/W3088215229","https://openalex.org/W2184553228"],"abstract_inverted_index":{"Converting":[0],"written":[1],"texts":[2],"into":[3,50,97],"their":[4,98],"spoken":[5],"forms":[6],"is":[7,81],"an":[8,18],"essential":[9],"problem":[10],"in":[11],"any":[12],"text-to-speech":[13],"(TTS)":[14],"systems.":[15],"However,":[16],"building":[17],"effective":[19],"text":[20],"normalization":[21,70],"solution":[22],"for":[23,107],"a":[24,67,78,91,120,138],"real-world":[25],"TTS":[26],"system":[27],"face":[28],"two":[29],"main":[30],"challenges:":[31],"(1)":[32],"the":[33,133,147,158,163,171,180,189],"semantic":[34],"ambiguity":[35],"of":[36,157,170,186],"non-standard":[37],"words":[38],"(NSWs),":[39],"e.g.,":[40],"numbers,":[41],"dates,":[42],"ranges,":[43],"scores,":[44],"abbreviations,":[45],"and":[46,59,116,151,173,204,209],"(2)":[47],"transforming":[48],"NSWs":[49,96],"pronounceable":[51],"syllables,":[52],"such":[53],"as":[54],"URL,":[55,150],"email":[56],"address,":[57],"hashtag,":[58,148],"contact":[60,152],"name.":[61,153],"In":[62,132],"this":[63],"paper,":[64],"we":[65,136],"propose":[66,137],"new":[68],"two-phase":[69],"approach":[71,194],"to":[72,83,144],"deal":[73],"with":[74,188,202,206,212],"these":[75],"challenges.":[76],"First,":[77],"model-based":[79],"tagger":[80],"designed":[82],"detect":[84],"NSWs.":[85],"Then,":[86],"depending":[87],"on":[88,119],"NSW":[89,108],"types,":[90],"rule-based":[92],"normalizer":[93],"expands":[94],"those":[95],"final":[99],"verbal":[100],"forms.":[101],"We":[102],"conducted":[103],"three":[104],"empirical":[105],"experiments":[106],"detection":[109],"using":[110],"Conditional":[111],"Random":[112],"Fields":[113],"(CRFs),":[114],"BiLSTM-CNN-CRF,":[115],"BERT-BiGRU-CRF":[117,190,213],"models":[118,175],"manually":[121],"annotated":[122],"dataset":[123],"including":[124],"5819":[125],"sentences":[126],"extracted":[127],"from":[128],"Vietnamese":[129],"news":[130],"articles.":[131],"second":[134],"phase,":[135],"forward":[139],"lexicon-based":[140],"maximum":[141],"matching":[142],"algorithm":[143],"split":[145],"down":[146],"email,":[149],"The":[154],"experimental":[155],"results":[156],"tagging":[159],"phase":[160],"show":[161],"that":[162],"average":[164],"F":[165,182],"<inf":[166,183],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[167,184],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</inf>":[168,185],"scores":[169],"BiLSTM-CNN-CRF":[172],"CRF":[174,203],"are":[176],"above":[177],"90.00%,":[178],"reaching":[179],"highest":[181],"95.00%":[187],"model.":[191],"Overall,":[192],"our":[193],"has":[195],"low":[196],"sentence":[197],"error":[198],"rates,":[199],"at":[200],"8.15%":[201],"7.11%":[205],"BiLSTM-CNNCRF":[207],"taggers,":[208],"only":[210],"6.67%":[211],"tagger.":[214]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
