{"id":"https://openalex.org/W7081977695","doi":"https://doi.org/10.1109/access.2025.3609462","title":"A Novel Data Augmentation Framework for Arabic Multi-Label Text Classification Using AraBART, AraGPT2, and Borderline-SMOTE","display_name":"A Novel Data Augmentation Framework for Arabic Multi-Label Text Classification Using AraBART, AraGPT2, and Borderline-SMOTE","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W7081977695","doi":"https://doi.org/10.1109/access.2025.3609462"},"language":"en","primary_location":{"id":"doi:10.1109/access.2025.3609462","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3609462","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2025.3609462","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Samia F. Abdhood","orcid":"https://orcid.org/0009-0002-6907-916X"},"institutions":[{"id":"https://openalex.org/I885383172","display_name":"National University of Malaysia","ror":"https://ror.org/00bw8d226","country_code":"MY","type":"education","lineage":["https://openalex.org/I885383172"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Samia F. Abdhood","raw_affiliation_strings":["Center for Artificial Intelligence Technology, Faculty of Information Science and Technology, Universiti Kebangsaan Malaysia, Bangi, Selangor, Malaysia"],"raw_orcid":"https://orcid.org/0009-0002-6907-916X","affiliations":[{"raw_affiliation_string":"Center for Artificial Intelligence Technology, Faculty of Information Science and Technology, Universiti Kebangsaan Malaysia, Bangi, Selangor, Malaysia","institution_ids":["https://openalex.org/I885383172"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Nazlia Omar","orcid":"https://orcid.org/0000-0002-8173-8933"},"institutions":[{"id":"https://openalex.org/I885383172","display_name":"National University of Malaysia","ror":"https://ror.org/00bw8d226","country_code":"MY","type":"education","lineage":["https://openalex.org/I885383172"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Nazlia Omar","raw_affiliation_strings":["Center for Artificial Intelligence Technology, Faculty of Information Science and Technology, Universiti Kebangsaan Malaysia, Bangi, Selangor, Malaysia"],"raw_orcid":"https://orcid.org/0000-0002-8173-8933","affiliations":[{"raw_affiliation_string":"Center for Artificial Intelligence Technology, Faculty of Information Science and Technology, Universiti Kebangsaan Malaysia, Bangi, Selangor, Malaysia","institution_ids":["https://openalex.org/I885383172"]}]},{"author_position":"last","author":{"id":null,"display_name":"Sabrina Tiun","orcid":"https://orcid.org/0000-0002-1134-973X"},"institutions":[{"id":"https://openalex.org/I885383172","display_name":"National University of Malaysia","ror":"https://ror.org/00bw8d226","country_code":"MY","type":"education","lineage":["https://openalex.org/I885383172"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Sabrina Tiun","raw_affiliation_strings":["Center for Artificial Intelligence Technology, Faculty of Information Science and Technology, Universiti Kebangsaan Malaysia, Bangi, Selangor, Malaysia"],"raw_orcid":"https://orcid.org/0000-0002-1134-973X","affiliations":[{"raw_affiliation_string":"Center for Artificial Intelligence Technology, Faculty of Information Science and Technology, Universiti Kebangsaan Malaysia, Bangi, Selangor, Malaysia","institution_ids":["https://openalex.org/I885383172"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.5913712,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":null,"first_page":"169769","last_page":"169778"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T12157","display_name":"Geochemistry and Geologic Mapping","score":0.6814000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12157","display_name":"Geochemistry and Geologic Mapping","score":0.6814000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13067","display_name":"Geological Modeling and Analysis","score":0.030300000682473183,"subfield":{"id":"https://openalex.org/subfields/1906","display_name":"Geochemistry and Petrology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14311","display_name":"Electrical and Electromagnetic Research","score":0.016300000250339508,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.6988999843597412},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.6583999991416931},{"id":"https://openalex.org/keywords/undersampling","display_name":"Undersampling","score":0.6449000239372253},{"id":"https://openalex.org/keywords/oversampling","display_name":"Oversampling","score":0.6243000030517578},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5458999872207642},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.4528999924659729},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.41749998927116394},{"id":"https://openalex.org/keywords/dropout","display_name":"Dropout (neural networks)","score":0.39320001006126404}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.857200026512146},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.6988999843597412},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.6583999991416931},{"id":"https://openalex.org/C136536468","wikidata":"https://www.wikidata.org/wiki/Q1225894","display_name":"Undersampling","level":2,"score":0.6449000239372253},{"id":"https://openalex.org/C197323446","wikidata":"https://www.wikidata.org/wiki/Q331222","display_name":"Oversampling","level":3,"score":0.6243000030517578},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5999000072479248},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5458999872207642},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4595000147819519},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.4528999924659729},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.41749998927116394},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4106000065803528},{"id":"https://openalex.org/C2776145597","wikidata":"https://www.wikidata.org/wiki/Q25339462","display_name":"Dropout (neural networks)","level":2,"score":0.39320001006126404},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.37929999828338623},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3400000035762787},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.3172000050544739},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.29499998688697815},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28279998898506165},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.27970001101493835},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.2709999978542328},{"id":"https://openalex.org/C110083411","wikidata":"https://www.wikidata.org/wiki/Q1744628","display_name":"Statistical classification","level":2,"score":0.25200000405311584}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2025.3609462","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3609462","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:f9acf1c6b24c4a0595302567abbf72ae","is_oa":true,"landing_page_url":"https://doaj.org/article/f9acf1c6b24c4a0595302567abbf72ae","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 13, Pp 169769-169778 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2025.3609462","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3609462","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.48896870017051697,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1173550986","display_name":null,"funder_award_id":"TAP-K007009","funder_id":"https://openalex.org/F4320322699","funder_display_name":"Universiti Kebangsaan Malaysia"}],"funders":[{"id":"https://openalex.org/F4320322699","display_name":"Universiti Kebangsaan Malaysia","ror":"https://ror.org/00bw8d226"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W2896155461","https://openalex.org/W2971296908","https://openalex.org/W2971875451","https://openalex.org/W2974335209","https://openalex.org/W3019087140","https://openalex.org/W3032398227","https://openalex.org/W3034999214","https://openalex.org/W3041651500","https://openalex.org/W3110065779","https://openalex.org/W3148105069","https://openalex.org/W3155368131","https://openalex.org/W3158777740","https://openalex.org/W3175414601","https://openalex.org/W3176044494","https://openalex.org/W3181034584","https://openalex.org/W3201915713","https://openalex.org/W4213116657","https://openalex.org/W4225948714","https://openalex.org/W4308690713","https://openalex.org/W4310687019","https://openalex.org/W4310908168","https://openalex.org/W4312221408","https://openalex.org/W4316658241","https://openalex.org/W4316663276","https://openalex.org/W4327644597","https://openalex.org/W4366373892","https://openalex.org/W4385573810","https://openalex.org/W4388936673","https://openalex.org/W4392627015","https://openalex.org/W4399171839","https://openalex.org/W4400127166","https://openalex.org/W4400871226","https://openalex.org/W4401288511","https://openalex.org/W4401342802","https://openalex.org/W4402968143","https://openalex.org/W4403920147","https://openalex.org/W4408274850","https://openalex.org/W4410526967"],"related_works":[],"abstract_inverted_index":{"Data":[0],"Augmentation":[1],"(DA)":[2],"techniques":[3,68],"present":[4],"solutions":[5,20],"for":[6,21,93],"Natural":[7],"Language":[8],"Processing":[9],"(NLP)":[10],"to":[11,39,42,47,52,87,97,126,138,154,163,181,223,243],"address":[12,98],"class":[13,22,207],"imbalance":[14,208],"and":[15,80,121,141,144,214,230],"data":[16,130,145,212],"scarcity.":[17],"The":[18,108,197],"current":[19],"imbalance,":[23],"either":[24,71],"random":[25,28],"oversampling":[26,37],"or":[27,61,73],"undersampling":[29],"techniques,":[30,57],"suffer":[31],"from":[32,237],"several":[33],"issues.":[34],"For":[35],"instance,":[36],"leads":[38,46],"overfitting":[40],"due":[41,51],"replication,":[43],"whilst":[44],"under-sampling":[45],"loss":[48],"of":[49,112,167,185],"information":[50],"removals.":[53],"Meanwhile,":[54],"traditional":[55],"DA":[56,91],"including":[58],"paraphrasing,":[59],"rule-based,":[60],"noising":[62],"approaches,":[63],"require":[64],"strong":[65],"lexicons.":[66],"These":[67],"are":[69],"also":[70],"time-consuming":[72],"introduce":[74,127],"noise,":[75],"resulting":[76],"in":[77,102,259],"incorrect":[78],"syntactical":[79],"semantic":[81],"contexts.":[82],"Hence,":[83],"this":[84,248],"paper":[85],"aims":[86],"propose":[88],"a":[89,172,191],"novel":[90,265],"framework":[92,110,203,254],"Arabic":[94,103,119,133,193,260],"news":[95,195],"articles":[96],"the":[99,118,165,168,183,186,201,206,224,234,240,252,257],"prevailing":[100],"challenges":[101],"Multi-Label":[104,217],"Text":[105,218],"Classification":[106,219],"(AMLTC).":[107],"proposed":[109,202,253],"consists":[111],"three":[113],"phases:":[114],"abstractive":[115],"summarization":[116],"using":[117,147],"Bidirectional":[120,173],"Auto-Regressive":[122],"Transformer":[123,136],"(AraBART)":[124],"model":[125,178],"new":[128],"features,":[129],"generation":[131],"with":[132],"Generative":[134],"Pre-trained":[135],"(AraGPT2)":[137],"create":[139],"diverse":[140],"contextual":[142],"texts,":[143],"balancing":[146],"borderline":[148],"Synthetic":[149],"Minority":[150],"Over-Sampling":[151],"Technique":[152],"(BorderlineSMOTE)":[153],"achieve":[155],"an":[156],"optimal":[157],"balance.":[158],"Each":[159],"phase":[160],"was":[161,179],"evaluated":[162],"ensure":[164],"quality":[166],"augmented":[169,187,266],"data.":[170,267],"Furthermore,":[171],"Long":[174],"Short-Term":[175],"Memory":[176],"(BiLSTM)":[177],"used":[180],"assess":[182],"performance":[184,221],"dataset":[188,242],"(augDS)":[189],"on":[190,239],"multi-label":[192],"RTN":[194],"dataset.":[196],"results":[198],"demonstrated":[199],"that":[200,251],"effectively":[204],"addressed":[205],"problem":[209],"by":[210,228,262],"preserving":[211],"integrity":[213],"significantly":[215],"improving":[216],"(MLTC)":[220],"compared":[222],"non-augDS,":[225],"as":[226],"confirmed":[227],"independent":[229],"paired":[231],"t-tests.":[232],"Specifically,":[233],"F1-score":[235],"increased":[236],"0.54":[238],"original":[241],"0.90":[244],"after":[245],"augmentation.":[246],"Overall,":[247],"study":[249],"demonstrates":[250],"successfully":[255],"addresses":[256],"issues":[258],"datasets":[261],"generating":[263],"diverse,":[264],"Additionally,":[268],"it":[269],"enhanced":[270],"MLTC":[271],"performance,":[272],"showcasing":[273],"its":[274],"effectiveness.":[275]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
