{"id":"https://openalex.org/W4408981324","doi":"https://doi.org/10.1007/s13278-025-01444-9","title":"Pre-trained language model for code-mixed text in Indonesian, Javanese, and English using transformer","display_name":"Pre-trained language model for code-mixed text in Indonesian, Javanese, and English using transformer","publication_year":2025,"publication_date":"2025-03-28","ids":{"openalex":"https://openalex.org/W4408981324","doi":"https://doi.org/10.1007/s13278-025-01444-9"},"language":"en","primary_location":{"id":"doi:10.1007/s13278-025-01444-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s13278-025-01444-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s13278-025-01444-9.pdf","source":{"id":"https://openalex.org/S2764891196","display_name":"Social Network Analysis and Mining","issn_l":"1869-5450","issn":["1869-5450","1869-5469"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Social Network Analysis and Mining","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s13278-025-01444-9.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007014627","display_name":"Ahmad Fathan Hidayatullah","orcid":"https://orcid.org/0000-0002-3755-2648"},"institutions":[{"id":"https://openalex.org/I189462010","display_name":"Universiti Brunei Darussalam","ror":"https://ror.org/02qnf3n86","country_code":"BN","type":"education","lineage":["https://openalex.org/I189462010"]},{"id":"https://openalex.org/I35427347","display_name":"Islamic University of Indonesia","ror":"https://ror.org/000pmrk50","country_code":"ID","type":"education","lineage":["https://openalex.org/I35427347"]}],"countries":["BN","ID"],"is_corresponding":true,"raw_author_name":"Ahmad Fathan Hidayatullah","raw_affiliation_strings":["Department of Informatics, Universitas Islam Indonesia, Jalan Kaliurang km 14.5, Sleman, Yogyakarta, 55584, Indonesia","School of Digital Science, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Bandar Seri Begawan, BE1410, Brunei Darussalam"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Universitas Islam Indonesia, Jalan Kaliurang km 14.5, Sleman, Yogyakarta, 55584, Indonesia","institution_ids":["https://openalex.org/I35427347"]},{"raw_affiliation_string":"School of Digital Science, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Bandar Seri Begawan, BE1410, Brunei Darussalam","institution_ids":["https://openalex.org/I189462010"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039234629","display_name":"Rosyzie Anna Awg Haji Mohd Apong","orcid":"https://orcid.org/0009-0002-3971-9314"},"institutions":[{"id":"https://openalex.org/I189462010","display_name":"Universiti Brunei Darussalam","ror":"https://ror.org/02qnf3n86","country_code":"BN","type":"education","lineage":["https://openalex.org/I189462010"]}],"countries":["BN"],"is_corresponding":false,"raw_author_name":"Rosyzie Anna Apong","raw_affiliation_strings":["School of Digital Science, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Bandar Seri Begawan, BE1410, Brunei Darussalam"],"affiliations":[{"raw_affiliation_string":"School of Digital Science, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Bandar Seri Begawan, BE1410, Brunei Darussalam","institution_ids":["https://openalex.org/I189462010"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110190122","display_name":"Daphne Teck Ching Lai","orcid":null},"institutions":[{"id":"https://openalex.org/I189462010","display_name":"Universiti Brunei Darussalam","ror":"https://ror.org/02qnf3n86","country_code":"BN","type":"education","lineage":["https://openalex.org/I189462010"]}],"countries":["BN"],"is_corresponding":false,"raw_author_name":"Daphne Teck Ching Lai","raw_affiliation_strings":["School of Digital Science, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Bandar Seri Begawan, BE1410, Brunei Darussalam"],"affiliations":[{"raw_affiliation_string":"School of Digital Science, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Bandar Seri Begawan, BE1410, Brunei Darussalam","institution_ids":["https://openalex.org/I189462010"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056446470","display_name":"Atika Qazi","orcid":"https://orcid.org/0000-0002-3565-6355"},"institutions":[{"id":"https://openalex.org/I189462010","display_name":"Universiti Brunei Darussalam","ror":"https://ror.org/02qnf3n86","country_code":"BN","type":"education","lineage":["https://openalex.org/I189462010"]}],"countries":["BN"],"is_corresponding":false,"raw_author_name":"Atika Qazi","raw_affiliation_strings":["Centre for Lifelong Learning, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Bandar Seri Begawan, BE1410, Brunei Darussalam"],"affiliations":[{"raw_affiliation_string":"Centre for Lifelong Learning, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Bandar Seri Begawan, BE1410, Brunei Darussalam","institution_ids":["https://openalex.org/I189462010"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5007014627"],"corresponding_institution_ids":["https://openalex.org/I189462010","https://openalex.org/I35427347"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":16.6951,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.98815432,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"15","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9750999808311462,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/indonesian","display_name":"Indonesian","score":0.8621525764465332},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6635022163391113},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6366944313049316},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6205874681472778},{"id":"https://openalex.org/keywords/code-switching","display_name":"Code-switching","score":0.5237786173820496},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48594796657562256},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.4638400673866272},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10153523087501526},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.09913462400436401},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.05721369385719299},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.050423890352249146}],"concepts":[{"id":"https://openalex.org/C2779207338","wikidata":"https://www.wikidata.org/wiki/Q9240","display_name":"Indonesian","level":2,"score":0.8621525764465332},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6635022163391113},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6366944313049316},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6205874681472778},{"id":"https://openalex.org/C18552078","wikidata":"https://www.wikidata.org/wiki/Q255615","display_name":"Code-switching","level":2,"score":0.5237786173820496},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48594796657562256},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4638400673866272},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10153523087501526},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.09913462400436401},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.05721369385719299},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.050423890352249146}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s13278-025-01444-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s13278-025-01444-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s13278-025-01444-9.pdf","source":{"id":"https://openalex.org/S2764891196","display_name":"Social Network Analysis and Mining","issn_l":"1869-5450","issn":["1869-5450","1869-5469"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Social Network Analysis and Mining","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s13278-025-01444-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s13278-025-01444-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s13278-025-01444-9.pdf","source":{"id":"https://openalex.org/S2764891196","display_name":"Social Network Analysis and Mining","issn_l":"1869-5450","issn":["1869-5450","1869-5469"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Social Network Analysis and Mining","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7599999904632568,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320311019","display_name":"Universiti Brunei Darussalam","ror":"https://ror.org/02qnf3n86"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4408981324.pdf"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W1981276685","https://openalex.org/W2256323257","https://openalex.org/W2962784628","https://openalex.org/W2983418531","https://openalex.org/W2986154550","https://openalex.org/W3034284720","https://openalex.org/W3035390927","https://openalex.org/W3096266342","https://openalex.org/W3098637735","https://openalex.org/W3099919888","https://openalex.org/W3103187652","https://openalex.org/W3104186312","https://openalex.org/W3115404050","https://openalex.org/W3116295307","https://openalex.org/W3156761824","https://openalex.org/W3173110011","https://openalex.org/W3181003422","https://openalex.org/W3199243620","https://openalex.org/W4200494043","https://openalex.org/W4206130444","https://openalex.org/W4226134572","https://openalex.org/W4246183800","https://openalex.org/W4285060468","https://openalex.org/W4316021280","https://openalex.org/W4377699472","https://openalex.org/W4381681023","https://openalex.org/W4389518325","https://openalex.org/W4389520255","https://openalex.org/W4392669812","https://openalex.org/W6600297362","https://openalex.org/W6600424091","https://openalex.org/W6601211009","https://openalex.org/W6702248584","https://openalex.org/W6826056734"],"related_works":["https://openalex.org/W4242621793","https://openalex.org/W648807974","https://openalex.org/W4229837155","https://openalex.org/W4236086937","https://openalex.org/W2294302573","https://openalex.org/W4312419881","https://openalex.org/W2086953810","https://openalex.org/W2512763533","https://openalex.org/W4248715694","https://openalex.org/W4281838804"],"abstract_inverted_index":{"Pre-trained":[0],"language":[1,19,39,98,112,209],"models":[2,113,164,169,181,210],"(PLMs)":[3],"have":[4],"become":[5],"increasingly":[6],"popular":[7],"due":[8],"to":[9,12,214],"their":[10],"ability":[11],"achieve":[13],"state-of-the-art":[14],"performance":[15,199],"on":[16],"various":[17,171],"natural":[18,97],"processing":[20],"tasks":[21,203],"with":[22,33],"less":[23],"training":[24,141,159],"data":[25],"and":[26,40,77,124,136,144,156,167,204,221],"time.":[27],"However,":[28],"they":[29],"struggle":[30],"when":[31],"dealing":[32],"code-mixed":[34,66,184,219],"data,":[35],"characterized":[36],"by":[37],"colloquial":[38],"inconsistent":[41],"linguistic":[42],"forms.":[43],"This":[44,186],"limitation":[45],"arises":[46],"because":[47],"most":[48],"available":[49],"PLMs":[50,62],"are":[51,91],"trained":[52],"monolingually":[53],"or":[54],"for":[55,65,71,116,182,191],"individual":[56],"languages.":[57],"Furthermore,":[58],"the":[59,81,94,176,189,197],"availability":[60],"of":[61,84,86,96,110,178,218],"specifically":[63,114],"designed":[64,115],"text":[67],"remains":[68],"limited,":[69],"especially":[70],"low-resource":[72],"languages":[73,90,118,220],"like":[74],"Indonesian":[75,120],"(ID)":[76],"Javanese":[78,122],"(JV).":[79],"Despite":[80],"significant":[82],"number":[83],"speakers":[85],"these":[87,102],"languages,":[88],"both":[89],"underrepresented":[92],"in":[93,170,200],"field":[95],"processing.":[99],"To":[100],"address":[101],"issues,":[103],"this":[104],"study":[105,187],"introduces":[106],"IndoJavE,":[107],"a":[108,215],"series":[109],"pre-trained":[111,180,208],"mixing":[117],"between":[119],"(ID),":[121],"(JV),":[123],"English":[125],"(EN)":[126],"texts.":[127,185],"We":[128],"developed":[129],"four":[130],"transformer-based":[131],"models,":[132],"IndoJavE-BERT,":[133],"IndoJavE-RoBERTa,":[134],"IndoJavE-IndoBERTweet,":[135],"IndoJavE-IndoBERT,":[137],"using":[138],"two":[139],"approaches:":[140],"from":[142,160],"scratch":[143],"transfer":[145,151],"learning.":[146],"Our":[147],"results":[148],"show":[149],"that":[150,211],"learning":[152],"is":[153],"more":[154,206],"efficient":[155],"effective":[157],"than":[158],"scratch.":[161],"The":[162],"IndoJavE":[163],"outperformed":[165],"multilingual":[166],"monolingual":[168],"downstream":[172],"NLP":[173,202],"tasks,":[174],"highlighting":[175],"importance":[177],"specialized":[179],"handling":[183],"paves":[188],"way":[190],"future":[192],"research":[193],"directions,":[194],"including":[195],"exploring":[196],"models\u2019":[198],"diverse":[201],"developing":[205],"versatile":[207],"can":[212],"adapt":[213],"broader":[216],"range":[217],"dialects.":[222]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":5}],"updated_date":"2026-04-07T14:57:38.498316","created_date":"2025-10-10T00:00:00"}
