{"id":"https://openalex.org/W4388936673","doi":"https://doi.org/10.1109/access.2023.3336311","title":"Data Augmentation Using Transformers and Similarity Measures for Improving Arabic Text Classification","display_name":"Data Augmentation Using Transformers and Similarity Measures for Improving Arabic Text Classification","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4388936673","doi":"https://doi.org/10.1109/access.2023.3336311"},"language":"en","primary_location":{"id":"doi:10.1109/access.2023.3336311","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3336311","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10328600.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10328600.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084756913","display_name":"Dania Refai","orcid":"https://orcid.org/0000-0002-4599-8797"},"institutions":[{"id":"https://openalex.org/I158749337","display_name":"Princess Sumaya University for Technology","ror":"https://ror.org/01jy46q10","country_code":"JO","type":"education","lineage":["https://openalex.org/I158749337"]}],"countries":["JO"],"is_corresponding":false,"raw_author_name":"Dania Refai","raw_affiliation_strings":["Computer Science Department, Princess Sumaya University for Technology, Amman, Jordan"],"raw_orcid":"https://orcid.org/0000-0002-4599-8797","affiliations":[{"raw_affiliation_string":"Computer Science Department, Princess Sumaya University for Technology, Amman, Jordan","institution_ids":["https://openalex.org/I158749337"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085771784","display_name":"Saleh M. Abu-Soud","orcid":"https://orcid.org/0000-0001-9144-9409"},"institutions":[{"id":"https://openalex.org/I158749337","display_name":"Princess Sumaya University for Technology","ror":"https://ror.org/01jy46q10","country_code":"JO","type":"education","lineage":["https://openalex.org/I158749337"]}],"countries":["JO"],"is_corresponding":false,"raw_author_name":"Saleh Abu-Soud","raw_affiliation_strings":["Data Science Department, Princess Sumaya University for Technology, Amman, Jordan"],"raw_orcid":"https://orcid.org/0000-0001-9144-9409","affiliations":[{"raw_affiliation_string":"Data Science Department, Princess Sumaya University for Technology, Amman, Jordan","institution_ids":["https://openalex.org/I158749337"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078301634","display_name":"Mohammad J. Abdel\u2010Rahman","orcid":"https://orcid.org/0000-0001-5788-6656"},"institutions":[{"id":"https://openalex.org/I158749337","display_name":"Princess Sumaya University for Technology","ror":"https://ror.org/01jy46q10","country_code":"JO","type":"education","lineage":["https://openalex.org/I158749337"]},{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["JO","US"],"is_corresponding":false,"raw_author_name":"Mohammad J. Abdel-Rahman","raw_affiliation_strings":["Data Science Department, Princess Sumaya University for Technology, Amman, Jordan","Electrical and Computer Engineering Department, Virginia Tech, Blacksburg, VA, USA"],"raw_orcid":"https://orcid.org/0000-0001-5788-6656","affiliations":[{"raw_affiliation_string":"Data Science Department, Princess Sumaya University for Technology, Amman, Jordan","institution_ids":["https://openalex.org/I158749337"]},{"raw_affiliation_string":"Electrical and Computer Engineering Department, Virginia Tech, Blacksburg, VA, USA","institution_ids":["https://openalex.org/I859038795"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":4.079,"has_fulltext":true,"cited_by_count":25,"citation_normalized_percentile":{"value":0.95146055,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"11","issue":null,"first_page":"132516","last_page":"132531"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8228150010108948},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6437097787857056},{"id":"https://openalex.org/keywords/jaccard-index","display_name":"Jaccard index","score":0.5625736117362976},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5220817923545837},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.5050578713417053},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4801768362522125},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.4504292607307434},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3661140203475952},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3544217348098755},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3542286455631256}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8228150010108948},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6437097787857056},{"id":"https://openalex.org/C203519979","wikidata":"https://www.wikidata.org/wiki/Q865360","display_name":"Jaccard index","level":3,"score":0.5625736117362976},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5220817923545837},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.5050578713417053},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4801768362522125},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.4504292607307434},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3661140203475952},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3544217348098755},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3542286455631256},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2023.3336311","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3336311","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10328600.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:d4b36ed458034b7aa1767aa4e34a39e9","is_oa":true,"landing_page_url":"https://doaj.org/article/d4b36ed458034b7aa1767aa4e34a39e9","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 11, Pp 132516-132531 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2023.3336311","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3336311","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10328600.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6399999856948853}],"awards":[],"funders":[{"id":"https://openalex.org/F4320318285","display_name":"New York Institute of Technology","ror":"https://ror.org/01bghzb51"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4388936673.pdf","grobid_xml":"https://content.openalex.org/works/W4388936673.grobid-xml"},"referenced_works_count":68,"referenced_works":["https://openalex.org/W1965657003","https://openalex.org/W1973965874","https://openalex.org/W2035642982","https://openalex.org/W2101105183","https://openalex.org/W2228486207","https://openalex.org/W2239389665","https://openalex.org/W2250594687","https://openalex.org/W2398463581","https://openalex.org/W2471147443","https://openalex.org/W2473945007","https://openalex.org/W2782223840","https://openalex.org/W2937423263","https://openalex.org/W2953343412","https://openalex.org/W2954996726","https://openalex.org/W2964236337","https://openalex.org/W2970641574","https://openalex.org/W2971296908","https://openalex.org/W2991266149","https://openalex.org/W3008110149","https://openalex.org/W3034336785","https://openalex.org/W3037013468","https://openalex.org/W3048162013","https://openalex.org/W3097513514","https://openalex.org/W3105625590","https://openalex.org/W3113862052","https://openalex.org/W3115267989","https://openalex.org/W3115908473","https://openalex.org/W3148105069","https://openalex.org/W3154410208","https://openalex.org/W3154741768","https://openalex.org/W3170305303","https://openalex.org/W3171388604","https://openalex.org/W3173569225","https://openalex.org/W3174828871","https://openalex.org/W3176169354","https://openalex.org/W3181034584","https://openalex.org/W3182164106","https://openalex.org/W3201915713","https://openalex.org/W4200608161","https://openalex.org/W4210336044","https://openalex.org/W4245461916","https://openalex.org/W4283814918","https://openalex.org/W4287079360","https://openalex.org/W4287661507","https://openalex.org/W4287728395","https://openalex.org/W4306955484","https://openalex.org/W4319791992","https://openalex.org/W4367858557","https://openalex.org/W4385245566","https://openalex.org/W4385574306","https://openalex.org/W6677704767","https://openalex.org/W6689044022","https://openalex.org/W6697261853","https://openalex.org/W6752906821","https://openalex.org/W6758993734","https://openalex.org/W6765939562","https://openalex.org/W6780673046","https://openalex.org/W6780739316","https://openalex.org/W6784056776","https://openalex.org/W6787958253","https://openalex.org/W6788007171","https://openalex.org/W6788242128","https://openalex.org/W6789924496","https://openalex.org/W6793723455","https://openalex.org/W6793962581","https://openalex.org/W6796698822","https://openalex.org/W6798051994","https://openalex.org/W6849158869"],"related_works":["https://openalex.org/W4381948805","https://openalex.org/W4214483597","https://openalex.org/W4396220545","https://openalex.org/W4388145912","https://openalex.org/W1437580529","https://openalex.org/W2307895033","https://openalex.org/W3048951355","https://openalex.org/W4313532769","https://openalex.org/W3215923396","https://openalex.org/W4286850906"],"abstract_inverted_index":{"The":[0,111,152,166,177],"performance":[1,55,146],"of":[2,12,118,147],"learning":[3],"models":[4],"heavily":[5],"relies":[6],"on":[7,78,138,156,190],"the":[8,17,41,74,99,105,108,125,133,144,148,181,185],"availability":[9],"and":[10,48,56,122,129,164,174,209],"adequacy":[11,19],"training":[13],"data.":[14],"To":[15],"address":[16],"dataset":[18,46],"issue,":[20],"researchers":[21],"have":[22,70],"extensively":[23],"explored":[24,71],"data":[25,35],"augmentation":[26,109],"(DA)":[27],"as":[28,82],"a":[29,92],"promising":[30],"approach.":[31],"DA":[32,72,95],"generates":[33],"new":[34,93],"instances":[36],"through":[37],"transformations":[38],"applied":[39],"to":[40,142],"available":[42],"data,":[43],"thereby":[44],"increasing":[45],"size":[47],"variability.":[49],"This":[50],"approach":[51],"has":[52],"enhanced":[53,184],"model":[54],"accuracy,":[57],"particularly":[58],"in":[59,64,116,170,196,201,204,207,211],"addressing":[60],"class":[61],"imbalance":[62],"problems":[63],"classification":[65,140,145,189],"tasks.":[66],"However,":[67],"few":[68],"studies":[69],"for":[73,107],"Arabic":[75,94,150,159,186],"language,":[76],"relying":[77],"traditional":[79],"approaches":[80],"such":[81],"paraphrasing":[83],"or":[84],"noising-based":[85],"techniques.":[86],"In":[87],"this":[88],"paper,":[89],"we":[90],"propose":[91],"method":[96],"that":[97,180],"employs":[98],"recent":[100],"powerful":[101],"modeling":[102],"technique,":[103],"namely":[104],"AraGPT-2,":[106],"process.":[110],"generated":[112],"sentences":[113],"are":[114],"evaluated":[115],"terms":[117],"context,":[119],"semantics,":[120],"diversity,":[121],"novelty":[123],"using":[124],"Euclidean,":[126],"cosine,":[127],"Jaccard,":[128],"BLEU":[130],"distances.":[131],"Finally,":[132],"AraBERT":[134],"transformer":[135],"is":[136],"used":[137],"sentiment":[139,158,187],"tasks":[141],"evaluate":[143],"augmented":[149],"dataset.":[151],"experiments":[153],"were":[154],"conducted":[155],"four":[157],"datasets:":[160],"AraSarcasm,":[161,202],"ASTD,":[162,205],"ATT,":[163,208],"MOVIE.":[165,212],"selected":[167],"datasets":[168,192],"vary":[169],"size,":[171],"label":[172],"number,":[173],"unbalanced":[175],"classes.":[176],"results":[178],"show":[179],"proposed":[182],"methodology":[183],"text":[188],"all":[191],"with":[193],"an":[194],"increase":[195],"F1":[197],"score":[198],"by":[199],"7%":[200],"8%":[203],"11%":[206],"13%":[210]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":7}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
