{"id":"https://openalex.org/W4411403559","doi":"https://doi.org/10.1145/3744341","title":"Exploring the Effectiveness of Pre-training Language Models with Incorporation of Diglossia for Hong Kong Content","display_name":"Exploring the Effectiveness of Pre-training Language Models with Incorporation of Diglossia for Hong Kong Content","publication_year":2025,"publication_date":"2025-06-18","ids":{"openalex":"https://openalex.org/W4411403559","doi":"https://doi.org/10.1145/3744341"},"language":"en","primary_location":{"id":"doi:10.1145/3744341","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3744341","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5118443924","display_name":"Yiu Cheong Yung","orcid":"https://orcid.org/0009-0009-0979-4509"},"institutions":[{"id":"https://openalex.org/I91807558","display_name":"National Cheng Kung University","ror":"https://ror.org/01b8kcc49","country_code":"TW","type":"education","lineage":["https://openalex.org/I91807558"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Yiu Cheong Yung","raw_affiliation_strings":["National Cheng Kung University","National Cheng Kung University, Tainan, Taiwan"],"raw_orcid":"https://orcid.org/0009-0009-0979-4509","affiliations":[{"raw_affiliation_string":"National Cheng Kung University","institution_ids":["https://openalex.org/I91807558"]},{"raw_affiliation_string":"National Cheng Kung University, Tainan, Taiwan","institution_ids":["https://openalex.org/I91807558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040237383","display_name":"Ying-Jia Lin","orcid":"https://orcid.org/0000-0003-4347-0232"},"institutions":[{"id":"https://openalex.org/I173093425","display_name":"Chang Gung University","ror":"https://ror.org/00d80zx46","country_code":"TW","type":"education","lineage":["https://openalex.org/I173093425"]},{"id":"https://openalex.org/I91807558","display_name":"National Cheng Kung University","ror":"https://ror.org/01b8kcc49","country_code":"TW","type":"education","lineage":["https://openalex.org/I91807558"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Ying-Jia Lin","raw_affiliation_strings":["Department of Artificial Intelligence, Chang Gung University","National Cheng Kung University","National Cheng Kung University, Tainan, Taiwan and Department of Artificial Intelligence, Chang Gung University, Taoyuan, Taiwan"],"raw_orcid":"https://orcid.org/0000-0003-4347-0232","affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence, Chang Gung University","institution_ids":["https://openalex.org/I173093425"]},{"raw_affiliation_string":"National Cheng Kung University","institution_ids":["https://openalex.org/I91807558"]},{"raw_affiliation_string":"National Cheng Kung University, Tainan, Taiwan and Department of Artificial Intelligence, Chang Gung University, Taoyuan, Taiwan","institution_ids":["https://openalex.org/I91807558","https://openalex.org/I173093425"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101898313","display_name":"Hung\u2010Yu Kao","orcid":"https://orcid.org/0000-0002-8890-8544"},"institutions":[{"id":"https://openalex.org/I25846049","display_name":"National Tsing Hua University","ror":"https://ror.org/00zdnkx70","country_code":"TW","type":"education","lineage":["https://openalex.org/I25846049"]},{"id":"https://openalex.org/I91807558","display_name":"National Cheng Kung University","ror":"https://ror.org/01b8kcc49","country_code":"TW","type":"education","lineage":["https://openalex.org/I91807558"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hung-Yu Kao","raw_affiliation_strings":["Department of Computer Science, National Tsing Hua University","National Cheng Kung University","National Cheng Kung University, Tainan, Taiwan and Department of Computer Science, National Tsing Hua University, Hsinchu, Taiwan"],"raw_orcid":"https://orcid.org/0000-0002-8890-8544","affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Tsing Hua University","institution_ids":["https://openalex.org/I25846049"]},{"raw_affiliation_string":"National Cheng Kung University","institution_ids":["https://openalex.org/I91807558"]},{"raw_affiliation_string":"National Cheng Kung University, Tainan, Taiwan and Department of Computer Science, National Tsing Hua University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I25846049","https://openalex.org/I91807558"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5118443924"],"corresponding_institution_ids":["https://openalex.org/I91807558"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06229313,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"24","issue":"7","first_page":"1","last_page":"16"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9819999933242798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/diglossia","display_name":"Diglossia","score":0.932919979095459},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.48760786652565},{"id":"https://openalex.org/keywords/content","display_name":"Content (measure theory)","score":0.4703616797924042},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.44358888268470764},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3527296185493469},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13321438431739807},{"id":"https://openalex.org/keywords/neuroscience-of-multilingualism","display_name":"Neuroscience of multilingualism","score":0.13305673003196716},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.050619423389434814}],"concepts":[{"id":"https://openalex.org/C504331141","wikidata":"https://www.wikidata.org/wiki/Q59203","display_name":"Diglossia","level":3,"score":0.932919979095459},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.48760786652565},{"id":"https://openalex.org/C2778152352","wikidata":"https://www.wikidata.org/wiki/Q5165061","display_name":"Content (measure theory)","level":2,"score":0.4703616797924042},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.44358888268470764},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3527296185493469},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13321438431739807},{"id":"https://openalex.org/C49876356","wikidata":"https://www.wikidata.org/wiki/Q7002651","display_name":"Neuroscience of multilingualism","level":2,"score":0.13305673003196716},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.050619423389434814},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3744341","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3744341","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7699999809265137,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2806081754","https://openalex.org/W2911227954","https://openalex.org/W2962739339","https://openalex.org/W2963341956","https://openalex.org/W2965373594","https://openalex.org/W2979826702","https://openalex.org/W3035390927","https://openalex.org/W3212368439","https://openalex.org/W4294152847"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2014455571","https://openalex.org/W2889646846","https://openalex.org/W2021715146","https://openalex.org/W2049578483","https://openalex.org/W2351442623","https://openalex.org/W2241677070","https://openalex.org/W2605705372"],"abstract_inverted_index":{"In":[0],"this":[1,56],"article,":[2],"we":[3],"present":[4],"our":[5,88],"works":[6],"to":[7,44,92],"create":[8],"the":[9,18,23,46,58,63,67],"first":[10,59],"Hong":[11,33,51,97],"Kong":[12,52,98],"content-based":[13],"public":[14],"pre-training":[15,38,80],"dataset":[16,39,70],"and":[17,55,90],"experiments":[19],"which":[20],"resulted":[21],"in":[22,32,96],"creation":[24,36,71],"of":[25,37,48],"ELECTRA-based":[26],"models":[27,91],"for":[28,42],"commonly":[29],"used":[30],"languages":[31],"Kong.":[34],"The":[35],"is":[40,57],"required":[41],"us":[43],"study":[45,61],"effect":[47,64],"diglossia":[49,78],"on":[50,62],"language":[53],"model,":[54],"ever":[60],"starting":[65],"all":[66],"way":[68],"from":[69,79],"phase.":[72],"Our":[73],"experiment":[74],"shows":[75],"that":[76],"removing":[77],"data":[81,89],"hurts":[82],"model":[83],"performance.":[84],"We":[85],"will":[86],"release":[87],"encourage":[93],"future":[94],"studies":[95],"languages.":[99],"1":[100]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
