{"id":"https://openalex.org/W4387970438","doi":"https://doi.org/10.1109/iicaiet59451.2023.10292108","title":"Data Augmentation Approach for Language Identification in Imbalanced Bilingual Code-Mixed Social Media Datasets","display_name":"Data Augmentation Approach for Language Identification in Imbalanced Bilingual Code-Mixed Social Media Datasets","publication_year":2023,"publication_date":"2023-09-12","ids":{"openalex":"https://openalex.org/W4387970438","doi":"https://doi.org/10.1109/iicaiet59451.2023.10292108"},"language":"en","primary_location":{"id":"doi:10.1109/iicaiet59451.2023.10292108","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/iicaiet59451.2023.10292108","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Artificial Intelligence in Engineering and Technology (IICAIET)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081111167","display_name":"Mohd Suhairi Md Suhaimin","orcid":"https://orcid.org/0000-0002-2275-4677"},"institutions":[{"id":"https://openalex.org/I1308321612","display_name":"Ministry of Higher Education","ror":"https://ror.org/05mcs2t73","country_code":"MY","type":"government","lineage":["https://openalex.org/I1308321612"]},{"id":"https://openalex.org/I161371597","display_name":"Universiti of Malaysia Sabah","ror":"https://ror.org/040v70252","country_code":"MY","type":"education","lineage":["https://openalex.org/I161371597"]}],"countries":["MY"],"is_corresponding":true,"raw_author_name":"Mohd Suhairi Md Suhaimin","raw_affiliation_strings":["Universiti Malaysia Sabah,Data Technology and Applications Research Group, Faculty of Computing and Informatics,Sabah,Malaysia","Polytechnic and Community College Education Department, Ministry of Higher Education Malaysia, Putrajaya, Malaysia","Data Technology and Applications Research Group, Faculty of Computing and Informatics, Universiti Malaysia Sabah, Sabah, Malaysia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universiti Malaysia Sabah,Data Technology and Applications Research Group, Faculty of Computing and Informatics,Sabah,Malaysia","institution_ids":["https://openalex.org/I161371597"]},{"raw_affiliation_string":"Polytechnic and Community College Education Department, Ministry of Higher Education Malaysia, Putrajaya, Malaysia","institution_ids":["https://openalex.org/I1308321612"]},{"raw_affiliation_string":"Data Technology and Applications Research Group, Faculty of Computing and Informatics, Universiti Malaysia Sabah, Sabah, Malaysia","institution_ids":["https://openalex.org/I161371597"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042525716","display_name":"Mohd Hanafi Ahmad Hijazi","orcid":"https://orcid.org/0000-0003-0431-8967"},"institutions":[{"id":"https://openalex.org/I161371597","display_name":"Universiti of Malaysia Sabah","ror":"https://ror.org/040v70252","country_code":"MY","type":"education","lineage":["https://openalex.org/I161371597"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Mohd Hanafi Ahmad Hijazi","raw_affiliation_strings":["Universiti Malaysia Sabah,Data Technology and Applications Research Group, Faculty of Computing and Informatics,Sabah,Malaysia","Data Technology and Applications Research Group, Faculty of Computing and Informatics, Universiti Malaysia Sabah, Sabah, Malaysia","Faculty of Computing and Informatics, Creative Advanced Machine Intelligence Research Centre, Universiti Malaysia Sabah, Sabah, Malaysia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universiti Malaysia Sabah,Data Technology and Applications Research Group, Faculty of Computing and Informatics,Sabah,Malaysia","institution_ids":["https://openalex.org/I161371597"]},{"raw_affiliation_string":"Data Technology and Applications Research Group, Faculty of Computing and Informatics, Universiti Malaysia Sabah, Sabah, Malaysia","institution_ids":["https://openalex.org/I161371597"]},{"raw_affiliation_string":"Faculty of Computing and Informatics, Creative Advanced Machine Intelligence Research Centre, Universiti Malaysia Sabah, Sabah, Malaysia","institution_ids":["https://openalex.org/I161371597"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5095927421","display_name":"Ervin Gubin Moung","orcid":"https://orcid.org/0009-0004-5582-0986"},"institutions":[{"id":"https://openalex.org/I161371597","display_name":"Universiti of Malaysia Sabah","ror":"https://ror.org/040v70252","country_code":"MY","type":"education","lineage":["https://openalex.org/I161371597"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Ervin Gubin Moung","raw_affiliation_strings":["Universiti Malaysia Sabah,Data Technology and Applications Research Group, Faculty of Computing and Informatics,Sabah,Malaysia","Data Technology and Applications Research Group, Faculty of Computing and Informatics, Universiti Malaysia Sabah, Sabah, Malaysia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universiti Malaysia Sabah,Data Technology and Applications Research Group, Faculty of Computing and Informatics,Sabah,Malaysia","institution_ids":["https://openalex.org/I161371597"]},{"raw_affiliation_string":"Data Technology and Applications Research Group, Faculty of Computing and Informatics, Universiti Malaysia Sabah, Sabah, Malaysia","institution_ids":["https://openalex.org/I161371597"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065584400","display_name":"Mohd Azwan Mohamad Hamza","orcid":null},"institutions":[{"id":"https://openalex.org/I102913810","display_name":"Universiti Malaysia Pahang Al-Sultan Abdullah","ror":"https://ror.org/01704wp68","country_code":"MY","type":"education","lineage":["https://openalex.org/I102913810"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Mohd Azwan Mohamad Hamza","raw_affiliation_strings":["Knowledge Engineering &#x0026; Computational Linguistic (KECL) Research Group Universiti Malaysia Pahang,Pahang,Malaysia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Knowledge Engineering &#x0026; Computational Linguistic (KECL) Research Group Universiti Malaysia Pahang,Pahang,Malaysia","institution_ids":["https://openalex.org/I102913810"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5081111167"],"corresponding_institution_ids":["https://openalex.org/I1308321612","https://openalex.org/I161371597"],"apc_list":null,"apc_paid":null,"fwci":0.3393,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.66530871,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"257","last_page":"261"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8465118408203125},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.7700368165969849},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.7205876111984253},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6070793867111206},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.5702067613601685},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5321589708328247},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5113691687583923},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.48821595311164856},{"id":"https://openalex.org/keywords/code-switching","display_name":"Code-switching","score":0.4634297788143158},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.44039687514305115},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.43489134311676025},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4344373941421509},{"id":"https://openalex.org/keywords/scarcity","display_name":"Scarcity","score":0.43036895990371704},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3724541664123535},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.18505483865737915},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1596725583076477},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.14460617303848267},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10889643430709839}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8465118408203125},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.7700368165969849},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.7205876111984253},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6070793867111206},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.5702067613601685},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5321589708328247},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5113691687583923},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.48821595311164856},{"id":"https://openalex.org/C18552078","wikidata":"https://www.wikidata.org/wiki/Q255615","display_name":"Code-switching","level":2,"score":0.4634297788143158},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.44039687514305115},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.43489134311676025},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4344373941421509},{"id":"https://openalex.org/C109747225","wikidata":"https://www.wikidata.org/wiki/Q815758","display_name":"Scarcity","level":2,"score":0.43036895990371704},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3724541664123535},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.18505483865737915},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1596725583076477},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.14460617303848267},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10889643430709839},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iicaiet59451.2023.10292108","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/iicaiet59451.2023.10292108","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Artificial Intelligence in Engineering and Technology (IICAIET)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7599999904632568,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2061272101","https://openalex.org/W2798348125","https://openalex.org/W2896457183","https://openalex.org/W2972702443","https://openalex.org/W2972954451","https://openalex.org/W2989143494","https://openalex.org/W3012414323","https://openalex.org/W3034724424","https://openalex.org/W3035390927","https://openalex.org/W3153869297","https://openalex.org/W3201435873","https://openalex.org/W4211107845","https://openalex.org/W4283766873","https://openalex.org/W4381681023","https://openalex.org/W4388122583","https://openalex.org/W6755207826","https://openalex.org/W6756296142","https://openalex.org/W6769263558"],"related_works":["https://openalex.org/W2771594921","https://openalex.org/W1998541766","https://openalex.org/W2064542902","https://openalex.org/W2262900283","https://openalex.org/W1985349217","https://openalex.org/W2098508228","https://openalex.org/W2624072012","https://openalex.org/W1508853749","https://openalex.org/W69308499","https://openalex.org/W1485788187"],"abstract_inverted_index":{"Addressing":[0],"the":[1,28,85,88,99,107,110,126],"problem":[2],"of":[3,34,87,109,121,128],"language":[4,52,95,114,129],"identification":[5,53,96,115,130],"in":[6,19,54,112,131],"code-mixed":[7,56,132],"datasets":[8],"poses":[9],"notable":[10],"challenges":[11,23,127],"due":[12],"to":[13,40,50,79],"data":[14],"scarcity":[15],"and":[16,31,66,101],"high":[17],"confusability":[18],"bilingual":[20,55],"contexts.":[21],"These":[22],"are":[24],"further":[25],"amplified":[26],"by":[27],"associated":[29],"imbalance":[30],"noise":[32],"characteristic":[33],"social":[35,57],"media":[36,58],"data,":[37],"complicating":[38],"efforts":[39],"optimize":[41],"performance.":[42],"This":[43],"paper":[44],"introduces":[45],"an":[46],"augmentation":[47],"approach":[48,74,111],"designed":[49],"enhance":[51],"data.":[59,133],"By":[60],"incorporating":[61],"reverse":[62],"translation,":[63],"semantic":[64],"similarity,":[65],"sampling":[67],"techniques":[68,123],"alongside":[69],"customized":[70],"reprocessing":[71],"strategies,":[72],"our":[73],"offers":[75],"a":[76,118],"comprehensive":[77],"solution":[78],"these":[80],"complex":[81],"issues.":[82],"To":[83],"evaluate":[84],"effectiveness":[86],"proposed":[89],"approach,":[90],"experiments":[91],"were":[92],"conducted":[93],"on":[94],"at":[97],"both":[98],"sentence":[100],"word":[102],"levels.":[103],"The":[104],"results":[105],"demonstrated":[106],"potential":[108],"optimizing":[113],"performance,":[116],"offering":[117],"compelling":[119],"combination":[120],"generation":[122],"for":[124],"addressing":[125]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-05-01T08:36:08.643496","created_date":"2025-10-10T00:00:00"}
