{"id":"https://openalex.org/W2338833146","doi":"https://doi.org/10.3390/a9020027","title":"The Effect of Preprocessing on Arabic Document Categorization","display_name":"The Effect of Preprocessing on Arabic Document Categorization","publication_year":2016,"publication_date":"2016-04-18","ids":{"openalex":"https://openalex.org/W2338833146","doi":"https://doi.org/10.3390/a9020027","mag":"2338833146"},"language":"en","primary_location":{"id":"doi:10.3390/a9020027","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a9020027","pdf_url":"https://www.mdpi.com/1999-4893/9/2/27/pdf?version=1460974899","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/9/2/27/pdf?version=1460974899","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000103199","display_name":"Abdullah Mohammed Ayedh","orcid":"https://orcid.org/0000-0002-0328-0613"},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Abdullah Ayedh","raw_affiliation_strings":["School of Information Science and Engineering, Central south University, Changsha 410000, China"],"raw_orcid":"https://orcid.org/0000-0002-0328-0613","affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Central south University, Changsha 410000, China","institution_ids":["https://openalex.org/I139660479"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101892247","display_name":"Guanzheng Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guanzheng TAN","raw_affiliation_strings":["School of Information Science and Engineering, Central south University, Changsha 410000, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Central south University, Changsha 410000, China","institution_ids":["https://openalex.org/I139660479"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026061069","display_name":"Khaled Alwesabi","orcid":null},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Khaled Alwesabi","raw_affiliation_strings":["School of Information Science and Engineering, Central south University, Changsha 410000, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Central south University, Changsha 410000, China","institution_ids":["https://openalex.org/I139660479"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004465737","display_name":"Hamdi Rajeh","orcid":null},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hamdi Rajeh","raw_affiliation_strings":["College of Computer Science and Electrical Engineering, Hunan University, Changsha 410000, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electrical Engineering, Hunan University, Changsha 410000, China","institution_ids":["https://openalex.org/I16609230"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101892247"],"corresponding_institution_ids":["https://openalex.org/I139660479"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":11.4848,"has_fulltext":false,"cited_by_count":75,"citation_normalized_percentile":{"value":0.98388001,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"9","issue":"2","first_page":"27","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.8410198092460632},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7409382462501526},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.7324557900428772},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7097689509391785},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.6892373561859131},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.6435189843177795},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.6351888179779053},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.6284995675086975},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.5300376415252686},{"id":"https://openalex.org/keywords/k-nearest-neighbors-algorithm","display_name":"k-nearest neighbors algorithm","score":0.5170441269874573},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.49157389998435974},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32614755630493164}],"concepts":[{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.8410198092460632},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7409382462501526},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.7324557900428772},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7097689509391785},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.6892373561859131},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.6435189843177795},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.6351888179779053},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.6284995675086975},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.5300376415252686},{"id":"https://openalex.org/C113238511","wikidata":"https://www.wikidata.org/wiki/Q1071612","display_name":"k-nearest neighbors algorithm","level":2,"score":0.5170441269874573},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.49157389998435974},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32614755630493164},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/a9020027","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a9020027","pdf_url":"https://www.mdpi.com/1999-4893/9/2/27/pdf?version=1460974899","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:2ea73f2f76244eee9e9928567d3f042c","is_oa":true,"landing_page_url":"https://doaj.org/article/2ea73f2f76244eee9e9928567d3f042c","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 9, Iss 2, p 27 (2016)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/1999-4893/9/2/27/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/a9020027","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms; Volume 9; Issue 2; Pages: 27","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/a9020027","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a9020027","pdf_url":"https://www.mdpi.com/1999-4893/9/2/27/pdf?version=1460974899","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6200000047683716}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2338833146.pdf"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W30790492","https://openalex.org/W34482692","https://openalex.org/W34690091","https://openalex.org/W152021126","https://openalex.org/W1968781299","https://openalex.org/W1975099470","https://openalex.org/W1978018883","https://openalex.org/W1978394996","https://openalex.org/W1984545377","https://openalex.org/W1996824467","https://openalex.org/W2014545475","https://openalex.org/W2018079881","https://openalex.org/W2026868490","https://openalex.org/W2039240145","https://openalex.org/W2044259175","https://openalex.org/W2047390476","https://openalex.org/W2049040426","https://openalex.org/W2059503205","https://openalex.org/W2077679384","https://openalex.org/W2077871956","https://openalex.org/W2083308811","https://openalex.org/W2091669653","https://openalex.org/W2103333826","https://openalex.org/W2113485698","https://openalex.org/W2118020653","https://openalex.org/W2126972628","https://openalex.org/W2137082019","https://openalex.org/W2149684865","https://openalex.org/W2152318140","https://openalex.org/W2166030254","https://openalex.org/W2189150373","https://openalex.org/W2258458006","https://openalex.org/W2545890875","https://openalex.org/W2899559665","https://openalex.org/W3022751503","https://openalex.org/W4256133489","https://openalex.org/W6633643324","https://openalex.org/W6675969814","https://openalex.org/W6678969280","https://openalex.org/W6684608583","https://openalex.org/W6687411592"],"related_works":["https://openalex.org/W2989490741","https://openalex.org/W3092506759","https://openalex.org/W2367545121","https://openalex.org/W4248881655","https://openalex.org/W2482165163","https://openalex.org/W3010890513","https://openalex.org/W120741642","https://openalex.org/W138569904","https://openalex.org/W2390914021","https://openalex.org/W2389417819"],"abstract_inverted_index":{"Preprocessing":[0],"is":[1],"one":[2],"of":[3,21,27,74,81,90,102,128],"the":[4,19,25,28,66,75,88,95,107,112,126],"main":[5],"components":[6],"in":[7],"a":[8,62],"conventional":[9],"document":[10,91],"categorization":[11,92],"(DC)":[12],"framework.":[13],"This":[14],"paper":[15],"aims":[16],"to":[17],"highlight":[18],"effect":[20],"preprocessing":[22,59,82,133],"tasks":[23,83],"on":[24,54,65,87,94],"efficiency":[26],"Arabic":[29,55,76],"DC":[30],"system.":[31],"In":[32],"this":[33,103],"study,":[34],"three":[35],"classification":[36,67,99],"techniques":[37,60],"are":[38],"used,":[39],"namely,":[40],"naive":[41],"Bayes":[42],"(NB),":[43],"k-nearest":[44],"neighbor":[45],"(KNN),":[46],"and":[47,98,114,130],"support":[48],"vector":[49],"machine":[50],"(SVM).":[51],"Experimental":[52],"analysis":[53],"datasets":[56],"reveals":[57],"that":[58,106],"have":[61],"significant":[63,85],"impact":[64],"accuracy,":[68],"especially":[69],"with":[70],"complicated":[71],"morphological":[72],"structure":[73],"language.":[77],"Choosing":[78],"appropriate":[79],"combinations":[80],"provides":[84],"improvement":[86],"accuracy":[89],"depending":[93],"feature":[96],"size":[97],"techniques.":[100,116],"Findings":[101],"study":[104],"show":[105],"SVM":[108,118],"technique":[109,119],"has":[110],"outperformed":[111],"KNN":[113],"NB":[115],"The":[117],"achieved":[120],"96.74%":[121],"micro-F1":[122],"value":[123],"by":[124],"using":[125],"combination":[127],"normalization":[129],"stemming":[131],"as":[132],"tasks.":[134]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":13},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":3}],"updated_date":"2026-05-22T06:13:13.366637","created_date":"2016-06-24T00:00:00"}
