{"id":"https://openalex.org/W2250548009","doi":"https://doi.org/10.3115/v1/w14-3902","title":"Code Mixing: A Challenge for Language Identification in the Language of Social Media","display_name":"Code Mixing: A Challenge for Language Identification in the Language of Social Media","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2250548009","doi":"https://doi.org/10.3115/v1/w14-3902","mag":"2250548009"},"language":"en","primary_location":{"id":"doi:10.3115/v1/w14-3902","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/w14-3902","pdf_url":"https://aclanthology.org/W14-3902.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Workshop on Computational Approaches to Code Switching","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/W14-3902.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007481805","display_name":"Utsab Barman","orcid":null},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Utsab Barman","raw_affiliation_strings":["CNGL Centre for Global Intelligent Content, National Centre for Language Technology","School of Computing, Dublin City University, Dublin, Ireland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CNGL Centre for Global Intelligent Content, National Centre for Language Technology","institution_ids":[]},{"raw_affiliation_string":"School of Computing, Dublin City University, Dublin, Ireland","institution_ids":["https://openalex.org/I42934936"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101679307","display_name":"Amitava Das","orcid":"https://orcid.org/0000-0002-3818-8227"},"institutions":[{"id":"https://openalex.org/I123534392","display_name":"University of North Texas","ror":"https://ror.org/00v97ad02","country_code":"US","type":"education","lineage":["https://openalex.org/I123534392"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amitava Das","raw_affiliation_strings":["Department of Computer Science and Engineering University of North Texas, Denton, Texas, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering University of North Texas, Denton, Texas, USA","institution_ids":["https://openalex.org/I123534392"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026305513","display_name":"Joachim Wagner","orcid":"https://orcid.org/0000-0002-8290-3849"},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Joachim Wagner","raw_affiliation_strings":["CNGL Centre for Global Intelligent Content, National Centre for Language Technology","School of Computing, Dublin City University, Dublin, Ireland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CNGL Centre for Global Intelligent Content, National Centre for Language Technology","institution_ids":[]},{"raw_affiliation_string":"School of Computing, Dublin City University, Dublin, Ireland","institution_ids":["https://openalex.org/I42934936"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066497689","display_name":"Jennifer Foster","orcid":"https://orcid.org/0000-0002-7789-4853"},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Jennifer Foster","raw_affiliation_strings":["CNGL Centre for Global Intelligent Content, National Centre for Language Technology","School of Computing, Dublin City University, Dublin, Ireland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CNGL Centre for Global Intelligent Content, National Centre for Language Technology","institution_ids":[]},{"raw_affiliation_string":"School of Computing, Dublin City University, Dublin, Ireland","institution_ids":["https://openalex.org/I42934936"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5007481805"],"corresponding_institution_ids":["https://openalex.org/I42934936"],"apc_list":null,"apc_paid":null,"fwci":24.5219,"has_fulltext":true,"cited_by_count":269,"citation_normalized_percentile":{"value":0.99570848,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"13","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7442846298217773},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.61906498670578},{"id":"https://openalex.org/keywords/code-mixing","display_name":"Code-mixing","score":0.5970786809921265},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.5322202444076538},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.49598345160484314},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.47080671787261963},{"id":"https://openalex.org/keywords/mixing","display_name":"Mixing (physics)","score":0.4337542653083801},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4208954870700836},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33328449726104736},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3222980797290802},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.30539846420288086},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2556339204311371},{"id":"https://openalex.org/keywords/code-switching","display_name":"Code-switching","score":0.22108864784240723},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.2057831883430481},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09299102425575256},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.07572361826896667}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7442846298217773},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.61906498670578},{"id":"https://openalex.org/C2778598016","wikidata":"https://www.wikidata.org/wiki/Q3201279","display_name":"Code-mixing","level":3,"score":0.5970786809921265},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.5322202444076538},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.49598345160484314},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.47080671787261963},{"id":"https://openalex.org/C138777275","wikidata":"https://www.wikidata.org/wiki/Q6884054","display_name":"Mixing (physics)","level":2,"score":0.4337542653083801},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4208954870700836},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33328449726104736},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3222980797290802},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.30539846420288086},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2556339204311371},{"id":"https://openalex.org/C18552078","wikidata":"https://www.wikidata.org/wiki/Q255615","display_name":"Code-switching","level":2,"score":0.22108864784240723},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2057831883430481},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09299102425575256},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.07572361826896667},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3115/v1/w14-3902","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/w14-3902","pdf_url":"https://aclanthology.org/W14-3902.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Workshop on Computational Approaches to Code Switching","raw_type":"proceedings-article"},{"id":"pmh:oai:doras.dcu.ie:25186","is_oa":true,"landing_page_url":"http://doras.dcu.ie/25186/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401511","display_name":"Dublin City University Open Access Institutional Repository (Dublin City University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I42934936","host_organization_name":"Dublin City University","host_organization_lineage":["https://openalex.org/I42934936"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"  Barman, Utsab, Das, Amitava ORCID: 0000-0003-3418-463X &lt;https://orcid.org/0000-0003-3418-463X&gt;, Wagner, Joachim ORCID: 0000-0002-8290-3849 &lt;https://orcid.org/0000-0002-8290-3849&gt; and Foster, Jennifer ORCID: 0000-0002-7789-4853 &lt;https://orcid.org/0000-0002-7789-4853&gt;  (2014) Code mixing: a challenge for language identification in the language of social media.  In: First Workshop on Computational Approaches to Code Switching, 25 Oct 2014, Doha, Qatar.      ","raw_type":"Conference or Workshop Item"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.672.6956","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.672.6956","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.aclweb.org/anthology/W/W14/W14-3902.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.693.9688","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.693.9688","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.computing.dcu.ie/%7Ejwagner/doc/Barman_challenge14.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.3115/v1/w14-3902","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/w14-3902","pdf_url":"https://aclanthology.org/W14-3902.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Workshop on Computational Approaches to Code Switching","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.75}],"awards":[{"id":"https://openalex.org/G6561435997","display_name":null,"funder_award_id":"Grant 12/CE/I2267","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"},{"id":"https://openalex.org/G8758897139","display_name":null,"funder_award_id":"12/CE/I2267","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"}],"funders":[{"id":"https://openalex.org/F4320311382","display_name":"Jadavpur University","ror":"https://ror.org/02af4h012"},{"id":"https://openalex.org/F4320320847","display_name":"Science Foundation Ireland","ror":"https://ror.org/0271asj38"},{"id":"https://openalex.org/F4320320865","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2250548009.pdf","grobid_xml":"https://content.openalex.org/works/W2250548009.grobid-xml"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W74783503","https://openalex.org/W157541337","https://openalex.org/W164793848","https://openalex.org/W195496810","https://openalex.org/W308745112","https://openalex.org/W371426616","https://openalex.org/W594384242","https://openalex.org/W1509958212","https://openalex.org/W1516289245","https://openalex.org/W1517579943","https://openalex.org/W1533946607","https://openalex.org/W1951381097","https://openalex.org/W1969005071","https://openalex.org/W1972711991","https://openalex.org/W2002019621","https://openalex.org/W2013489815","https://openalex.org/W2106403442","https://openalex.org/W2109943925","https://openalex.org/W2118585731","https://openalex.org/W2122052811","https://openalex.org/W2123660869","https://openalex.org/W2127589659","https://openalex.org/W2132609289","https://openalex.org/W2133990480","https://openalex.org/W2134134392","https://openalex.org/W2145867197","https://openalex.org/W2161490380","https://openalex.org/W2165734539","https://openalex.org/W2165855670","https://openalex.org/W2176685020","https://openalex.org/W2183465848","https://openalex.org/W2187612371","https://openalex.org/W2250243742","https://openalex.org/W2251149908","https://openalex.org/W2251801815","https://openalex.org/W2251862917","https://openalex.org/W2396764674","https://openalex.org/W2460474657","https://openalex.org/W3007541353","https://openalex.org/W3018725618","https://openalex.org/W3106549878","https://openalex.org/W4237155282","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W4390411868","https://openalex.org/W2734357165","https://openalex.org/W2794189087","https://openalex.org/W2946888518","https://openalex.org/W143242002","https://openalex.org/W1960112736","https://openalex.org/W4226263902","https://openalex.org/W2800058963","https://openalex.org/W4381681023","https://openalex.org/W2903429053"],"abstract_inverted_index":{"In":[0,25],"social":[1,45],"media":[2],"communication,":[3],"multilingual":[4],"speakers":[5],"often":[6],"switch":[7],"between":[8,70],"languages,":[9],"and,":[10],"in":[11,32,55],"such":[12],"an":[13],"environment,":[14],"automatic":[15,38],"language":[16,39,43,81],"identification":[17,40,82],"becomes":[18],"both":[19],"a":[20,49,92],"necessary":[21],"and":[22,64,73,101,105,123,126],"challenging":[23],"task.":[24],"this":[26,85],"paper,":[27],"we":[28,53],"describe":[29,48],"our":[30],"work":[31],"progress":[33],"on":[34],"the":[35,42,56,115],"problem":[36],"of":[37,44,58],"for":[41],"media.":[46],"We":[47,75,112],"new":[50],"dataset":[51],"that":[52,66,114,127],"are":[54,89],"process":[57],"creating,":[59],"which":[60],"contains":[61],"Facebook":[62],"posts":[63],"comments":[65],"exhibit":[67],"code":[68],"mixing":[69],"Bengali,":[71],"English":[72],"Hindi.":[74],"also":[76],"present":[77],"some":[78],"preliminary":[79],"word-level":[80,98],"experiments":[83],"using":[84,108],"dataset.":[86],"Different":[87],"techniques":[88],"employed,":[90],"including":[91],"simple":[93],"unsupervised":[94],"dictionary-based":[95,116],"approach,":[96],"supervised":[97,121],"classification":[99,122],"with":[100],"without":[102],"contextual":[103,133],"clues,":[104],"sequence":[106,124],"labelling":[107],"Conditional":[109],"Random":[110],"Fields.":[111],"find":[113],"approach":[117],"is":[118,129],"surpassed":[119],"by":[120],"labelling,":[125],"it":[128],"important":[130],"to":[131],"take":[132],"clues":[134],"into":[135],"consideration.":[136]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":19},{"year":2022,"cited_by_count":24},{"year":2021,"cited_by_count":37},{"year":2020,"cited_by_count":31},{"year":2019,"cited_by_count":32},{"year":2018,"cited_by_count":48},{"year":2017,"cited_by_count":24},{"year":2016,"cited_by_count":23},{"year":2015,"cited_by_count":8},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":1}],"updated_date":"2026-05-18T08:16:58.900851","created_date":"2025-10-10T00:00:00"}
