{"id":"https://openalex.org/W4312264611","doi":"https://doi.org/10.1109/access.2022.3223703","title":"A Systematic Review on Language Identification of Code-Mixed Text: Techniques, Data Availability, Challenges, and Framework Development","display_name":"A Systematic Review on Language Identification of Code-Mixed Text: Techniques, Data Availability, Challenges, and Framework Development","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4312264611","doi":"https://doi.org/10.1109/access.2022.3223703"},"language":"en","primary_location":{"id":"doi:10.1109/access.2022.3223703","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2022.3223703","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09956817.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09956817.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007014627","display_name":"Ahmad Fathan Hidayatullah","orcid":"https://orcid.org/0000-0002-3755-2648"},"institutions":[{"id":"https://openalex.org/I189462010","display_name":"Universiti Brunei Darussalam","ror":"https://ror.org/02qnf3n86","country_code":"BN","type":"education","lineage":["https://openalex.org/I189462010"]},{"id":"https://openalex.org/I35427347","display_name":"Islamic University of Indonesia","ror":"https://ror.org/000pmrk50","country_code":"ID","type":"education","lineage":["https://openalex.org/I35427347"]}],"countries":["BN","ID"],"is_corresponding":true,"raw_author_name":"Ahmad Fathan Hidayatullah","raw_affiliation_strings":["School of Digital Science, Universiti Brunei Darussalam, Gadong, Brunei Darussalam","Department of Informatics, Universitas Islam Indonesia, Jalan Kaliurang km 14.5, Yogyakarta, Indonesia","School of Digital Science, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Brunei Darussalam"],"raw_orcid":"https://orcid.org/0000-0002-3755-2648","affiliations":[{"raw_affiliation_string":"School of Digital Science, Universiti Brunei Darussalam, Gadong, Brunei Darussalam","institution_ids":["https://openalex.org/I189462010"]},{"raw_affiliation_string":"Department of Informatics, Universitas Islam Indonesia, Jalan Kaliurang km 14.5, Yogyakarta, Indonesia","institution_ids":["https://openalex.org/I35427347"]},{"raw_affiliation_string":"School of Digital Science, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Brunei Darussalam","institution_ids":["https://openalex.org/I189462010"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056446470","display_name":"Atika Qazi","orcid":"https://orcid.org/0000-0002-3565-6355"},"institutions":[{"id":"https://openalex.org/I189462010","display_name":"Universiti Brunei Darussalam","ror":"https://ror.org/02qnf3n86","country_code":"BN","type":"education","lineage":["https://openalex.org/I189462010"]}],"countries":["BN"],"is_corresponding":false,"raw_author_name":"Atika Qazi","raw_affiliation_strings":["Centre for Lifelong Learning, Universiti Brunei Darussalam, Gadong BE, Brunei Darussalam","Centre for Lifelong Learning, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Brunei Darussalam"],"raw_orcid":"https://orcid.org/0000-0002-3565-6355","affiliations":[{"raw_affiliation_string":"Centre for Lifelong Learning, Universiti Brunei Darussalam, Gadong BE, Brunei Darussalam","institution_ids":["https://openalex.org/I189462010"]},{"raw_affiliation_string":"Centre for Lifelong Learning, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Brunei Darussalam","institution_ids":["https://openalex.org/I189462010"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110190122","display_name":"Daphne Teck Ching Lai","orcid":null},"institutions":[{"id":"https://openalex.org/I189462010","display_name":"Universiti Brunei Darussalam","ror":"https://ror.org/02qnf3n86","country_code":"BN","type":"education","lineage":["https://openalex.org/I189462010"]}],"countries":["BN"],"is_corresponding":false,"raw_author_name":"Daphne Teck Ching Lai","raw_affiliation_strings":["School of Digital Science, Universiti Brunei Darussalam, Gadong, Brunei Darussalam","School of Digital Science, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Brunei Darussalam"],"raw_orcid":"https://orcid.org/0000-0001-8290-8941","affiliations":[{"raw_affiliation_string":"School of Digital Science, Universiti Brunei Darussalam, Gadong, Brunei Darussalam","institution_ids":["https://openalex.org/I189462010"]},{"raw_affiliation_string":"School of Digital Science, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Brunei Darussalam","institution_ids":["https://openalex.org/I189462010"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039234629","display_name":"Rosyzie Anna Awg Haji Mohd Apong","orcid":null},"institutions":[{"id":"https://openalex.org/I189462010","display_name":"Universiti Brunei Darussalam","ror":"https://ror.org/02qnf3n86","country_code":"BN","type":"education","lineage":["https://openalex.org/I189462010"]}],"countries":["BN"],"is_corresponding":false,"raw_author_name":"Rosyzie Anna Apong","raw_affiliation_strings":["School of Digital Science, Universiti Brunei Darussalam, Gadong, Brunei Darussalam","School of Digital Science, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Brunei Darussalam"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Digital Science, Universiti Brunei Darussalam, Gadong, Brunei Darussalam","institution_ids":["https://openalex.org/I189462010"]},{"raw_affiliation_string":"School of Digital Science, Universiti Brunei Darussalam, Jalan Tungku Link, Gadong, Brunei Darussalam","institution_ids":["https://openalex.org/I189462010"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5007014627"],"corresponding_institution_ids":["https://openalex.org/I189462010","https://openalex.org/I35427347"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":3.7496,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.94048237,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"10","issue":null,"first_page":"122812","last_page":"122831"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.984499990940094,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9818999767303467,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.809507429599762},{"id":"https://openalex.org/keywords/code-review","display_name":"Code review","score":0.6357203722000122},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.547353982925415},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5165873765945435},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5127997994422913},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5086170434951782},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5033320784568787},{"id":"https://openalex.org/keywords/code-mixing","display_name":"Code-mixing","score":0.46606332063674927},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.40784531831741333},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37318915128707886},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3722867965698242},{"id":"https://openalex.org/keywords/code-switching","display_name":"Code-switching","score":0.3420671820640564},{"id":"https://openalex.org/keywords/software-quality","display_name":"Software quality","score":0.2594391703605652},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.18736153841018677},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.11788630485534668},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.1171492338180542},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.11633089184761047}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.809507429599762},{"id":"https://openalex.org/C150292731","wikidata":"https://www.wikidata.org/wiki/Q1342704","display_name":"Code review","level":5,"score":0.6357203722000122},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.547353982925415},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5165873765945435},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5127997994422913},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5086170434951782},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5033320784568787},{"id":"https://openalex.org/C2778598016","wikidata":"https://www.wikidata.org/wiki/Q3201279","display_name":"Code-mixing","level":3,"score":0.46606332063674927},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.40784531831741333},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37318915128707886},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3722867965698242},{"id":"https://openalex.org/C18552078","wikidata":"https://www.wikidata.org/wiki/Q255615","display_name":"Code-switching","level":2,"score":0.3420671820640564},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.2594391703605652},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.18736153841018677},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11788630485534668},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.1171492338180542},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.11633089184761047},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2022.3223703","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2022.3223703","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09956817.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:cf2349882c4a452ea8d5c7a1a267bede","is_oa":true,"landing_page_url":"https://doaj.org/article/cf2349882c4a452ea8d5c7a1a267bede","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 10, Pp 122812-122831 (2022)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2022.3223703","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2022.3223703","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09956817.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7699999809265137,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320311019","display_name":"Universiti Brunei Darussalam","ror":"https://ror.org/02qnf3n86"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4312264611.pdf","grobid_xml":"https://content.openalex.org/works/W4312264611.grobid-xml"},"referenced_works_count":89,"referenced_works":["https://openalex.org/W2025003514","https://openalex.org/W2063027075","https://openalex.org/W2118328848","https://openalex.org/W2146277089","https://openalex.org/W2199960639","https://openalex.org/W2251149908","https://openalex.org/W2293363789","https://openalex.org/W2395626303","https://openalex.org/W2516191937","https://openalex.org/W2520003157","https://openalex.org/W2557418006","https://openalex.org/W2560987006","https://openalex.org/W2562905031","https://openalex.org/W2564394148","https://openalex.org/W2600617046","https://openalex.org/W2606675507","https://openalex.org/W2740885753","https://openalex.org/W2771452259","https://openalex.org/W2781543409","https://openalex.org/W2794564954","https://openalex.org/W2797433625","https://openalex.org/W2802473559","https://openalex.org/W2806265406","https://openalex.org/W2807800730","https://openalex.org/W2885940157","https://openalex.org/W2886100554","https://openalex.org/W2889324292","https://openalex.org/W2897600321","https://openalex.org/W2903145805","https://openalex.org/W2903429053","https://openalex.org/W2914019777","https://openalex.org/W2914444953","https://openalex.org/W2945102769","https://openalex.org/W2949713431","https://openalex.org/W2962937786","https://openalex.org/W2963171563","https://openalex.org/W2963676641","https://openalex.org/W2969746599","https://openalex.org/W2979826702","https://openalex.org/W2983418531","https://openalex.org/W2999346775","https://openalex.org/W3003906224","https://openalex.org/W3004521794","https://openalex.org/W3004835614","https://openalex.org/W3006926345","https://openalex.org/W3007986991","https://openalex.org/W3012414323","https://openalex.org/W3015158309","https://openalex.org/W3019581400","https://openalex.org/W3022086096","https://openalex.org/W3033048494","https://openalex.org/W3034196716","https://openalex.org/W3088665616","https://openalex.org/W3108387573","https://openalex.org/W3111166797","https://openalex.org/W3125671830","https://openalex.org/W3134124363","https://openalex.org/W3138317498","https://openalex.org/W3139112046","https://openalex.org/W3144543375","https://openalex.org/W3153801370","https://openalex.org/W3154741768","https://openalex.org/W3156761824","https://openalex.org/W3160009589","https://openalex.org/W3166943438","https://openalex.org/W3171205933","https://openalex.org/W3171500670","https://openalex.org/W3174959106","https://openalex.org/W3177101289","https://openalex.org/W3187166746","https://openalex.org/W3208815918","https://openalex.org/W3212241566","https://openalex.org/W3214113120","https://openalex.org/W3215893865","https://openalex.org/W4213055130","https://openalex.org/W4286419789","https://openalex.org/W4287019905","https://openalex.org/W4385245566","https://openalex.org/W6711863121","https://openalex.org/W6735438181","https://openalex.org/W6739901393","https://openalex.org/W6755423240","https://openalex.org/W6773106327","https://openalex.org/W6783518176","https://openalex.org/W6786374231","https://openalex.org/W6787815814","https://openalex.org/W6800898766","https://openalex.org/W6804073783","https://openalex.org/W6903861061"],"related_works":["https://openalex.org/W4205878333","https://openalex.org/W2755033583","https://openalex.org/W2508660901","https://openalex.org/W1485736417","https://openalex.org/W4378378905","https://openalex.org/W3166060075","https://openalex.org/W4389308877","https://openalex.org/W153744606","https://openalex.org/W3181795562","https://openalex.org/W2938595889"],"abstract_inverted_index":{"The":[0,140],"mix":[1],"of":[2,73,90,145,150,157,160],"native":[3],"language":[4,18],"with":[5,39],"other":[6],"languages":[7],"(code-mixing)":[8],"in":[9,64,83,153],"social":[10],"media":[11],"has":[12,23],"posed":[13],"a":[14,45,88,174],"severe":[15],"challenge":[16],"for":[17,48,80,93,128,177],"identification":[19],"(LID)":[20],"systems.":[21],"It":[22],"encouraged":[24],"research":[25,55,82],"on":[26,70],"code-mixed":[27,49,66,84,94,103,125,151],"LID":[28,67,95,104],"solutions.":[29],"This":[30,51,119],"study":[31,52],"investigated":[32],"the":[33,98,136,143,154,170],"techniques,":[34],"challenges,":[35],"and":[36,43,60,96,116,163,172],"dataset":[37],"availability":[38],"corresponding":[40],"quality":[41,137],"criteria":[42,138],"developed":[44],"comprehensive":[46],"framework":[47,176],"LID.":[50,85,129],"addressed":[53],"four":[54,108],"issues":[56],"to":[57,134],"identify":[58],"gaps":[59],"future":[61,81],"work":[62],"opportunities":[63],"tackling":[65],"challenges.":[68],"Based":[69],"our":[71,181],"analysis":[72],"reviewed":[74],"studies,":[75],"we":[76,106,168],"outlined":[77],"key":[78],"points":[79],"We":[86,130],"demonstrated":[87],"taxonomy":[89],"applied":[91],"techniques":[92],"highlighted":[97],"different":[99],"technique":[100],"variants.":[101],"In":[102],"tasks,":[105],"discovered":[107],"significant":[109],"challenges:":[110],"ambiguity,":[111],"lexical":[112],"borrowing,":[113],"non-standard":[114],"words,":[115],"intra-word":[117],"code-mixing.":[118],"systematic":[120],"literature":[121,182],"review":[122],"recognised":[123],"32":[124],"datasets":[126],"available":[127],"proposed":[131,173],"five":[132],"features":[133,141],"describe":[135],"dataset.":[139],"are":[142],"number":[144,156,159],"instances":[146],"or":[147],"sentences,":[148],"percentage":[149],"types":[152],"data,":[155],"tokens,":[158,162],"unique":[161],"average":[164],"sentence":[165],"length.":[166],"Finally,":[167],"synthesised":[169],"methodologies":[171],"conceptual":[175],"subsequent":[178],"studies":[179],"through":[180],"analysis.":[183]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
