{"id":"https://openalex.org/W4200144452","doi":"https://doi.org/10.1145/3483524","title":"A Statistical Language Model for Pre-Trained Sequence Labeling: A Case Study on Vietnamese","display_name":"A Statistical Language Model for Pre-Trained Sequence Labeling: A Case Study on Vietnamese","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W4200144452","doi":"https://doi.org/10.1145/3483524"},"language":"en","primary_location":{"id":"doi:10.1145/3483524","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3483524","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3483524","source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3483524","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084117790","display_name":"Xianwen Liao","orcid":"https://orcid.org/0000-0001-5755-1944"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xianwen Liao","raw_affiliation_strings":["Guilin University of Electronic Technology, Guilin, Guangxi, China"],"affiliations":[{"raw_affiliation_string":"Guilin University of Electronic Technology, Guilin, Guangxi, China","institution_ids":["https://openalex.org/I5343935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101669174","display_name":"Yongzhong Huang","orcid":"https://orcid.org/0000-0002-1620-3107"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongzhong Huang","raw_affiliation_strings":["Guilin University of Electronic Technology, Guilin, Guangxi, China"],"affiliations":[{"raw_affiliation_string":"Guilin University of Electronic Technology, Guilin, Guangxi, China","institution_ids":["https://openalex.org/I5343935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025314679","display_name":"Peng Yang","orcid":"https://orcid.org/0000-0002-1184-8117"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Yang","raw_affiliation_strings":["Guilin University of Electronic Technology, Guilin, Guangxi, China"],"affiliations":[{"raw_affiliation_string":"Guilin University of Electronic Technology, Guilin, Guangxi, China","institution_ids":["https://openalex.org/I5343935"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100333436","display_name":"Lei Chen","orcid":"https://orcid.org/0000-0002-0625-4808"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Chen","raw_affiliation_strings":["Guilin University of Electronic Technology, Guilin, Guangxi, China"],"affiliations":[{"raw_affiliation_string":"Guilin University of Electronic Technology, Guilin, Guangxi, China","institution_ids":["https://openalex.org/I5343935"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5084117790"],"corresponding_institution_ids":["https://openalex.org/I5343935"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1902524,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"21","issue":"3","first_page":"1","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence-labeling","display_name":"Sequence labeling","score":0.8636274337768555},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8277449607849121},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.669354259967804},{"id":"https://openalex.org/keywords/vietnamese","display_name":"Vietnamese","score":0.6512935757637024},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6453977823257446},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6402838230133057},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6336132884025574},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5939465165138245},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.5462827086448669},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5406365990638733},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.46471866965293884},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3334161043167114},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10607311129570007}],"concepts":[{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.8636274337768555},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8277449607849121},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.669354259967804},{"id":"https://openalex.org/C103621254","wikidata":"https://www.wikidata.org/wiki/Q9199","display_name":"Vietnamese","level":2,"score":0.6512935757637024},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6453977823257446},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6402838230133057},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6336132884025574},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5939465165138245},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.5462827086448669},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5406365990638733},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46471866965293884},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3334161043167114},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10607311129570007},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3483524","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3483524","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3483524","source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3483524","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3483524","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3483524","source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7799999713897705,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1477544716","display_name":null,"funder_award_id":"Guangdong","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4689664160","display_name":null,"funder_award_id":"61066008 and 61862011","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4716118765","display_name":null,"funder_award_id":"61862011","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4200144452.pdf","grobid_xml":"https://content.openalex.org/works/W4200144452.grobid-xml"},"referenced_works_count":38,"referenced_works":["https://openalex.org/W1524281572","https://openalex.org/W1757859293","https://openalex.org/W2064675550","https://openalex.org/W2250539671","https://openalex.org/W2250739653","https://openalex.org/W2296283641","https://openalex.org/W2358307482","https://openalex.org/W2518150831","https://openalex.org/W2564413773","https://openalex.org/W2782238183","https://openalex.org/W2911855264","https://openalex.org/W2945210837","https://openalex.org/W2962739339","https://openalex.org/W2962902328","https://openalex.org/W2962950859","https://openalex.org/W2963341956","https://openalex.org/W2963628345","https://openalex.org/W2964093505","https://openalex.org/W2964352165","https://openalex.org/W2971254483","https://openalex.org/W2996160789","https://openalex.org/W2997124358","https://openalex.org/W2997792775","https://openalex.org/W2997918300","https://openalex.org/W2997919746","https://openalex.org/W2998017003","https://openalex.org/W2998153464","https://openalex.org/W2998320111","https://openalex.org/W2998448492","https://openalex.org/W2998456908","https://openalex.org/W2998566943","https://openalex.org/W2998635301","https://openalex.org/W3009619352","https://openalex.org/W3104453603","https://openalex.org/W3145501851","https://openalex.org/W3169250374","https://openalex.org/W4365799947","https://openalex.org/W6683955732"],"related_works":["https://openalex.org/W2946128423","https://openalex.org/W2944691285","https://openalex.org/W2918555272","https://openalex.org/W2393940967","https://openalex.org/W2159591557","https://openalex.org/W2346578824","https://openalex.org/W2366925922","https://openalex.org/W2115592387","https://openalex.org/W2905950556","https://openalex.org/W2385598138"],"abstract_inverted_index":{"By":[0,75],"defining":[1],"the":[2,43,56,79,106,125,135,143],"computable":[3],"word":[4,50,85,138],"segmentation":[5,51,139],"unit":[6],"and":[7,37,62,89,120,153,158],"studying":[8],"its":[9,38],"probability":[10],"characteristics,":[11],"we":[12,67],"establish":[13],"an":[14,34],"unsupervised":[15,157],"statistical":[16],"language":[17],"model":[18],"(SLM)":[19],"for":[20],"a":[21,69,123],"new":[22],"pre-trained":[23],"sequence":[24,81,109,129],"labeling":[25,82,110,130],"framework":[26,131],"in":[27,53],"this":[28],"article.":[29],"The":[30,96],"proposed":[31],"SLM":[32,77,102,146],"is":[33,40,132,155],"optimization":[35],"model,":[36],"objective":[39],"to":[41,150,160],"maximize":[42],"total":[44],"binding":[45],"force":[46],"of":[47,58,108,117,127],"all":[48],"candidate":[49],"units":[52],"sentences":[54],"under":[55],"condition":[57],"no":[59,148],"annotated":[60],"datasets":[61],"vocabularies.":[63],"To":[64],"solve":[65],"SLM,":[66],"design":[68],"recursive":[70],"divide-and-conquer":[71],"dynamic":[72],"programming":[73],"algorithm.":[74],"integrating":[76],"with":[78],"popular":[80],"models,":[83],"Vietnamese":[84,137],"segmentation,":[86],"part-of-speech":[87],"tagging":[88],"named":[90],"entity":[91],"recognition":[92],"experiments":[93],"are":[94],"performed.":[95],"experimental":[97],"results":[98],"show":[99],"that":[100],"our":[101,128],"can":[103],"effectively":[104],"promote":[105],"performance":[107,126],"tasks.":[111],"Just":[112],"using":[113,122],"less":[114],"than":[115,134],"10%":[116],"training":[118],"data":[119],"without":[121],"dictionary,":[124],"better":[133],"state-of-the-art":[136],"toolkit":[140],"VnCoreNLP":[141],"on":[142],"cross-dataset":[144],"test.":[145],"has":[147,167],"hyper-parameter":[149],"be":[151],"tuned,":[152],"it":[154,166],"completely":[156],"applicable":[159],"any":[161],"other":[162],"analytic":[163],"language.":[164],"Thus,":[165],"good":[168],"domain":[169],"adaptability.":[170]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
