{"id":"https://openalex.org/W4205500136","doi":"https://doi.org/10.1145/3501398","title":"Investigating the Effect of Preprocessing Arabic Text on Offensive Language and Hate Speech Detection","display_name":"Investigating the Effect of Preprocessing Arabic Text on Offensive Language and Hate Speech Detection","publication_year":2022,"publication_date":"2022-01-19","ids":{"openalex":"https://openalex.org/W4205500136","doi":"https://doi.org/10.1145/3501398"},"language":"en","primary_location":{"id":"doi:10.1145/3501398","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3501398","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011174289","display_name":"Fatemah Husain","orcid":"https://orcid.org/0000-0003-3470-229X"},"institutions":[{"id":"https://openalex.org/I36721946","display_name":"Kuwait University","ror":"https://ror.org/021e5j056","country_code":"KW","type":"education","lineage":["https://openalex.org/I36721946"]}],"countries":["KW"],"is_corresponding":true,"raw_author_name":"Fatemah Husain","raw_affiliation_strings":["Kuwait University, Safat, Kuwait"],"affiliations":[{"raw_affiliation_string":"Kuwait University, Safat, Kuwait","institution_ids":["https://openalex.org/I36721946"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070926324","display_name":"\u00d6zlem Uzuner","orcid":"https://orcid.org/0000-0001-8011-9850"},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ozlem Uzuner","raw_affiliation_strings":["George Mason University, Fairfax, VA, USA"],"affiliations":[{"raw_affiliation_string":"George Mason University, Fairfax, VA, USA","institution_ids":["https://openalex.org/I162714631"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5011174289"],"corresponding_institution_ids":["https://openalex.org/I36721946"],"apc_list":null,"apc_paid":null,"fwci":4.9976,"has_fulltext":false,"cited_by_count":40,"citation_normalized_percentile":{"value":0.95698826,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"21","issue":"4","first_page":"1","last_page":"20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9666000008583069,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13959","display_name":"Swearing, Euphemism, Multilingualism","score":0.9501000046730042,"subfield":{"id":"https://openalex.org/subfields/3315","display_name":"Communication"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.828143835067749},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7849358320236206},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7314358353614807},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.6191774010658264},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6105881333351135},{"id":"https://openalex.org/keywords/offensive","display_name":"Offensive","score":0.5106509327888489},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.49507275223731995},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.45833900570869446},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.41961681842803955},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.37393367290496826},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34719812870025635},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.10493886470794678},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0724707841873169}],"concepts":[{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.828143835067749},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7849358320236206},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7314358353614807},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.6191774010658264},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6105881333351135},{"id":"https://openalex.org/C176856949","wikidata":"https://www.wikidata.org/wiki/Q2001676","display_name":"Offensive","level":2,"score":0.5106509327888489},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.49507275223731995},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.45833900570869446},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.41961681842803955},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.37393367290496826},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34719812870025635},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.10493886470794678},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0724707841873169},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3501398","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3501398","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.6299999952316284,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2104463314","https://openalex.org/W2127613773","https://openalex.org/W2642356091","https://openalex.org/W2737990573","https://openalex.org/W2767784948","https://openalex.org/W2913798560","https://openalex.org/W2972463128","https://openalex.org/W2977739081","https://openalex.org/W2979469769","https://openalex.org/W3021743785","https://openalex.org/W3025320888","https://openalex.org/W3116641301","https://openalex.org/W3134728912","https://openalex.org/W4211260480"],"related_works":["https://openalex.org/W2989490741","https://openalex.org/W2367545121","https://openalex.org/W4248881655","https://openalex.org/W2482165163","https://openalex.org/W3092506759","https://openalex.org/W138569904","https://openalex.org/W3010890513","https://openalex.org/W2390914021","https://openalex.org/W2389417819","https://openalex.org/W2368524271"],"abstract_inverted_index":{"Preprocessing":[0],"of":[1,26,39,46,49,53,64,92,101,128,138],"input":[2],"text":[3,10,88],"can":[4,168],"play":[5],"a":[6,90],"key":[7],"role":[8],"in":[9,103,135],"classification":[11],"by":[12],"reducing":[13],"dimensionality":[14],"and":[15,71,80,89,116,144,179],"removing":[16,76],"unnecessary":[17],"content.":[18],"This":[19],"study":[20],"aims":[21],"to":[22,41,59,67,124],"investigate":[23],"the":[24,126,136],"impact":[25,127],"preprocessing":[27,36,95,139],"on":[28,140,145,152,172],"Arabic":[29,42,50,58],"offensive":[30],"language":[31],"classification.":[32],"We":[33,83,97],"explore":[34],"six":[35,94],"techniques:":[37],"conversion":[38,63],"emojis":[40],"textual":[43],"labels,":[44],"normalization":[45,52],"different":[47,99],"forms":[48],"letters,":[51],"selected":[54,65],"nouns":[55],"from":[56,120,157,170],"dialectal":[57],"Modern":[60],"Standard":[61],"Arabic,":[62],"hyponyms":[66],"hypernyms,":[68],"hashtag":[69],"segmentation,":[70],"basic":[72],"cleaning":[73],"such":[74],"as":[75],"numbers,":[77],"kashidas,":[78],"diacritics,":[79],"HTML":[81],"tags.":[82],"also":[84],"experiment":[85],"with":[86],"raw":[87],"combination":[91],"all":[93],"techniques.":[96],"apply":[98],"types":[100],"classifiers":[102,163],"our":[104],"experiments":[105],"including":[106],"traditional":[107,160],"machine":[108,111,161],"learning,":[109,112],"ensemble":[110],"Artificial":[113],"Neural":[114],"Networks,":[115],"Bidirectional":[117],"Encoder":[118],"Representations":[119],"Transformers":[121],"(BERT)-based":[122],"models":[123],"analyze":[125],"preprocessing.":[129],"Our":[130],"results":[131,167],"demonstrate":[132],"significant":[133],"variations":[134],"effects":[137],"each":[141,146],"classifier":[142],"type":[143],"dataset.":[147],"Classifiers":[148],"that":[149,175],"are":[150],"based":[151],"BERT":[153],"do":[154],"not":[155],"benefit":[156,169],"preprocessing,":[158],"while":[159],"learning":[162],"do.":[164],"However,":[165],"these":[166],"validation":[171],"larger":[173],"datasets":[174],"cover":[176],"broader":[177],"domains":[178],"dialects.":[180]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":14},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":6}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
