{"id":"https://openalex.org/W4408944674","doi":"https://doi.org/10.1145/3726866","title":"Improving Code-Mixed Hate Detection by Native Sample Mixing: A Case Study for Hindi-English Code-Mixed Scenario","display_name":"Improving Code-Mixed Hate Detection by Native Sample Mixing: A Case Study for Hindi-English Code-Mixed Scenario","publication_year":2025,"publication_date":"2025-03-28","ids":{"openalex":"https://openalex.org/W4408944674","doi":"https://doi.org/10.1145/3726866"},"language":"en","primary_location":{"id":"doi:10.1145/3726866","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726866","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3726866","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5099008126","display_name":"Debajyoti Mazumder","orcid":null},"institutions":[{"id":"https://openalex.org/I288749910","display_name":"Indian Institute of Science Education and Research, Bhopal","ror":"https://ror.org/02rb21j89","country_code":"IN","type":"education","lineage":["https://openalex.org/I288749910"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Debajyoti Mazumder","raw_affiliation_strings":["DSE, IISER, Bhopal, Bhopal, India","IISER Bhopal,  Bhopal, India"],"raw_orcid":"https://orcid.org/0009-0004-2389-9204","affiliations":[{"raw_affiliation_string":"DSE, IISER, Bhopal, Bhopal, India","institution_ids":["https://openalex.org/I288749910"]},{"raw_affiliation_string":"IISER Bhopal,  Bhopal, India","institution_ids":["https://openalex.org/I288749910"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Aakash Kumar","orcid":"https://orcid.org/0009-0008-9735-1507"},"institutions":[{"id":"https://openalex.org/I288749910","display_name":"Indian Institute of Science Education and Research, Bhopal","ror":"https://ror.org/02rb21j89","country_code":"IN","type":"education","lineage":["https://openalex.org/I288749910"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Aakash Kumar","raw_affiliation_strings":["DSE, IISER, Bhopal, Bhopal, India","DSE, IISER Bhopal, Bhopal, India"],"raw_orcid":"https://orcid.org/0009-0008-9735-1507","affiliations":[{"raw_affiliation_string":"DSE, IISER, Bhopal, Bhopal, India","institution_ids":["https://openalex.org/I288749910"]},{"raw_affiliation_string":"DSE, IISER Bhopal, Bhopal, India","institution_ids":["https://openalex.org/I288749910"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025398442","display_name":"Jasabanta Patro","orcid":"https://orcid.org/0000-0003-2461-9679"},"institutions":[{"id":"https://openalex.org/I288749910","display_name":"Indian Institute of Science Education and Research, Bhopal","ror":"https://ror.org/02rb21j89","country_code":"IN","type":"education","lineage":["https://openalex.org/I288749910"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Jasabanta Patro","raw_affiliation_strings":["DSE, IISER, Bhopal, Bhopal, India","DSE, IISER Bhopal, Bhopal, India"],"raw_orcid":"https://orcid.org/0000-0003-2461-9679","affiliations":[{"raw_affiliation_string":"DSE, IISER, Bhopal, Bhopal, India","institution_ids":["https://openalex.org/I288749910"]},{"raw_affiliation_string":"DSE, IISER Bhopal, Bhopal, India","institution_ids":["https://openalex.org/I288749910"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5099008126"],"corresponding_institution_ids":["https://openalex.org/I288749910"],"apc_list":null,"apc_paid":null,"fwci":6.5198,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.95854198,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"24","issue":"5","first_page":"1","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code-mixing","display_name":"Code-mixing","score":0.7740964889526367},{"id":"https://openalex.org/keywords/hindi","display_name":"Hindi","score":0.769339919090271},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6268066167831421},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5837454795837402},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.531351625919342},{"id":"https://openalex.org/keywords/code-switching","display_name":"Code-switching","score":0.5202837586402893},{"id":"https://openalex.org/keywords/mixing","display_name":"Mixing (physics)","score":0.4807417392730713},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.40514326095581055},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.31951192021369934},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.22477087378501892},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.1139097809791565}],"concepts":[{"id":"https://openalex.org/C2778598016","wikidata":"https://www.wikidata.org/wiki/Q3201279","display_name":"Code-mixing","level":3,"score":0.7740964889526367},{"id":"https://openalex.org/C519982507","wikidata":"https://www.wikidata.org/wiki/Q1568","display_name":"Hindi","level":2,"score":0.769339919090271},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6268066167831421},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5837454795837402},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.531351625919342},{"id":"https://openalex.org/C18552078","wikidata":"https://www.wikidata.org/wiki/Q255615","display_name":"Code-switching","level":2,"score":0.5202837586402893},{"id":"https://openalex.org/C138777275","wikidata":"https://www.wikidata.org/wiki/Q6884054","display_name":"Mixing (physics)","level":2,"score":0.4807417392730713},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.40514326095581055},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.31951192021369934},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.22477087378501892},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1139097809791565},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3726866","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726866","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3726866","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726866","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1965555277","https://openalex.org/W1973253288","https://openalex.org/W2044173330","https://openalex.org/W2473555522","https://openalex.org/W2540646130","https://openalex.org/W2563826943","https://openalex.org/W2599674900","https://openalex.org/W2734862619","https://openalex.org/W2740168486","https://openalex.org/W2740471031","https://openalex.org/W2747187574","https://openalex.org/W2804569870","https://openalex.org/W2805807672","https://openalex.org/W2887782043","https://openalex.org/W2898401058","https://openalex.org/W2901236145","https://openalex.org/W2919234748","https://openalex.org/W2948947170","https://openalex.org/W2949340035","https://openalex.org/W2954793188","https://openalex.org/W2963341956","https://openalex.org/W2971050273","https://openalex.org/W2997887173","https://openalex.org/W3009899658","https://openalex.org/W3035390927","https://openalex.org/W3091315987","https://openalex.org/W3099919888","https://openalex.org/W3118485687","https://openalex.org/W3172559340","https://openalex.org/W3173380736","https://openalex.org/W3174882277","https://openalex.org/W3197986154","https://openalex.org/W3201295503","https://openalex.org/W4239019441","https://openalex.org/W4285716843","https://openalex.org/W4306873631","https://openalex.org/W4317566631","https://openalex.org/W4319349346","https://openalex.org/W4385741611","https://openalex.org/W4389519813","https://openalex.org/W4389523730","https://openalex.org/W4390939283","https://openalex.org/W4391156274","https://openalex.org/W4393200366"],"related_works":["https://openalex.org/W4205878333","https://openalex.org/W2058598214","https://openalex.org/W2755033583","https://openalex.org/W2508660901","https://openalex.org/W1485736417","https://openalex.org/W4407849766","https://openalex.org/W4378378905","https://openalex.org/W3166060075","https://openalex.org/W4389308877","https://openalex.org/W3181795562"],"abstract_inverted_index":{"Hate":[0],"detection":[1,112],"has":[2],"long":[3],"been":[4],"a":[5,17,121,147,204],"challenging":[6],"task":[7,13],"for":[8,54,156,186],"the":[9,21,25,28,36,55,78,96,102,106,142,153,157,161,173,182,208,227,232,258,265],"NLP":[10,103],"community.":[11],"The":[12],"becomes":[14],"complex":[15],"in":[16,77,86,113,120,172,178,220,231],"code-mixed":[18,45,87,122,144,174,187,201,233,253],"environment":[19],"because":[20],"models":[22,83],"must":[23],"understand":[24],"context":[26],"and":[27,235,260],"hate":[29,46,50,66,85,111,170,188,202,228,239],"expressed":[30],"through":[31,136],"language":[32,65,69,82,98],"alteration.":[33],"Compared":[34],"to":[35,126,132,198,203,251,263],"monolingual":[37],"setup,":[38],"we":[39,61,151,164],"see":[40],"much":[41,250],"less":[42],"work":[43],"on":[44,95,110,226],"as":[47,146,150],"large-scale":[48],"annotated":[49],"corpora":[51],"are":[52],"unavailable":[53],"study.":[56],"To":[57],"overcome":[58],"this":[59,134],"bottleneck,":[60],"propose":[62],"using":[63],"native":[64,71,97,169,194,216,246],"samples":[67,72,171,195,217,247],"(native":[68],"samples/":[70],"hereafter).":[73],"We":[74,140,255],"hypothesise":[75],"that":[76],"era":[79],"of":[80,108,160,184,210],"multilingual":[81],"(MLMs),":[84],"settings":[88],"can":[89],"be":[90,127,199],"detected":[91],"by":[92],"majorly":[93],"relying":[94],"samples.":[99],"Even":[100],"though":[101],"literature":[104],"reports":[105],"effectiveness":[107],"MLMs":[109,185,191,222],"many":[114],"cross-lingual":[115],"settings,":[116],"their":[117],"extensive":[118],"evaluation":[119],"scenario":[123],"is":[124,240],"yet":[125],"done.":[128],"This":[129],"article":[130],"attempts":[131],"fill":[133],"gap":[135],"rigorous":[137],"empirical":[138],"experiments.":[139],"considered":[141],"Hindi-English":[143],"setup":[145],"case":[148],"study":[149],"have":[152,256],"linguistic":[154],"expertise":[155],"same.":[158],"Some":[159],"interesting":[162],"observations":[163],"got":[165],"are:":[166],"(i)":[167],"adding":[168],"training":[175],"set,":[176],"even":[177],"small":[179],"quantity,":[180],"improved":[181],"performance":[183],"detection,":[189],"(ii)":[190],"trained":[192],"with":[193],"alone":[196],"observed":[197],"detecting":[200],"large":[205],"extent,":[206],"(iii)":[207],"visualisation":[209],"attention":[211],"scores":[212],"revealed":[213],"that,":[214],"when":[215,238],"were":[218],"included":[219],"training,":[221],"could":[223],"better":[224],"focus":[225],"emitting":[229],"words":[230],"context,":[234],"(iv)":[236],"finally,":[237],"subjective":[241],"or":[242],"sarcastic,":[243],"naively":[244],"mixing":[245],"doesn\u2019t":[248],"help":[249],"detect":[252],"hate.":[254],"released":[257],"data":[259],"code":[261],"repository":[262],"reproduce":[264],"reported":[266],"results.":[267],"1":[268]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
