{"id":"https://openalex.org/W7125833199","doi":"https://doi.org/10.1109/icit64950.2025.11049195","title":"Addressing Class Imbalance in Arabic Medical Questions using Ensembles and SMOTE","display_name":"Addressing Class Imbalance in Arabic Medical Questions using Ensembles and SMOTE","publication_year":2025,"publication_date":"2025-05-27","ids":{"openalex":"https://openalex.org/W7125833199","doi":"https://doi.org/10.1109/icit64950.2025.11049195"},"language":null,"primary_location":{"id":"doi:10.1109/icit64950.2025.11049195","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icit64950.2025.11049195","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 12th International Conference on Information Technology (ICIT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123882994","display_name":"Bushra Al-Smadi","orcid":null},"institutions":[{"id":"https://openalex.org/I114972647","display_name":"University of Jordan","ror":"https://ror.org/05k89ew48","country_code":"JO","type":"education","lineage":["https://openalex.org/I114972647"]}],"countries":["JO"],"is_corresponding":false,"raw_author_name":"Bushra Al-Smadi","raw_affiliation_strings":["The University of Jordan,King Abdullah II School of Information Technology,Amman,Jordan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Jordan,King Abdullah II School of Information Technology,Amman,Jordan","institution_ids":["https://openalex.org/I114972647"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124027612","display_name":"Bassam H. Hammo","orcid":null},"institutions":[{"id":"https://openalex.org/I114972647","display_name":"University of Jordan","ror":"https://ror.org/05k89ew48","country_code":"JO","type":"education","lineage":["https://openalex.org/I114972647"]}],"countries":["JO"],"is_corresponding":false,"raw_author_name":"Bassam H. Hammo","raw_affiliation_strings":["The University of Jordan,King Abdullah II School of Information Technology,Amman,Jordan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Jordan,King Abdullah II School of Information Technology,Amman,Jordan","institution_ids":["https://openalex.org/I114972647"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048560390","display_name":"Hossam Faris","orcid":"https://orcid.org/0000-0003-4261-8127"},"institutions":[{"id":"https://openalex.org/I114972647","display_name":"University of Jordan","ror":"https://ror.org/05k89ew48","country_code":"JO","type":"education","lineage":["https://openalex.org/I114972647"]}],"countries":["JO"],"is_corresponding":false,"raw_author_name":"Hossam Faris","raw_affiliation_strings":["The University of Jordan,King Abdullah II School of Information Technology,Amman,Jordan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Jordan,King Abdullah II School of Information Technology,Amman,Jordan","institution_ids":["https://openalex.org/I114972647"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.80354787,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"297","last_page":"303"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2651999890804291,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2651999890804291,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.23659999668598175,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.09809999912977219,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.8687000274658203},{"id":"https://openalex.org/keywords/adaboost","display_name":"AdaBoost","score":0.8378999829292297},{"id":"https://openalex.org/keywords/oversampling","display_name":"Oversampling","score":0.6309999823570251},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.588699996471405},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.5230000019073486},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.47909998893737793},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4648999869823456},{"id":"https://openalex.org/keywords/logistic-regression","display_name":"Logistic regression","score":0.38359999656677246}],"concepts":[{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.8687000274658203},{"id":"https://openalex.org/C141404830","wikidata":"https://www.wikidata.org/wiki/Q2823869","display_name":"AdaBoost","level":3,"score":0.8378999829292297},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6976000070571899},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.694100022315979},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6495000123977661},{"id":"https://openalex.org/C197323446","wikidata":"https://www.wikidata.org/wiki/Q331222","display_name":"Oversampling","level":3,"score":0.6309999823570251},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.588699996471405},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.5230000019073486},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.47909998893737793},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4648999869823456},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.38359999656677246},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.36880001425743103},{"id":"https://openalex.org/C110083411","wikidata":"https://www.wikidata.org/wiki/Q1744628","display_name":"Statistical classification","level":2,"score":0.3628000020980835},{"id":"https://openalex.org/C162040801","wikidata":"https://www.wikidata.org/wiki/Q799897","display_name":"Bootstrap aggregating","level":2,"score":0.35580000281333923},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33799999952316284},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.29159998893737793},{"id":"https://openalex.org/C2779891985","wikidata":"https://www.wikidata.org/wiki/Q46994","display_name":"Telemedicine","level":3,"score":0.28029999136924744},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.25200000405311584},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icit64950.2025.11049195","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icit64950.2025.11049195","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 12th International Conference on Information Technology (ICIT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"rise":[1],"of":[2,20,57,93,133,177,201,222],"telemedicine":[3,214],"platforms":[4,22,215],"like":[5],"Altibbi":[6,98],"has":[7],"transformed":[8],"healthcare":[9],"access,":[10],"enabling":[11],"patients":[12,226],"to":[13,35,48,69,129,187],"consult":[14],"specialists":[15],"remotely.":[16],"One":[17],"vital":[18,199],"component":[19],"these":[21,40],"is":[23],"medical":[24,95,223],"question":[25],"classification,":[26],"which":[27],"ensures":[28],"that":[29,152],"patient":[30],"inquiries":[31],"are":[32],"efficiently":[33],"directed":[34],"the":[36,55,131,141,159,198,219,233],"appropriate":[37],"specialists.":[38,235],"However,":[39],"datasets":[41],"often":[42],"suffer":[43],"from":[44,97,172,185,232],"class":[45],"imbalance,":[46],"leading":[47],"biased":[49],"model":[50],"performance.":[51],"This":[52,126],"study":[53],"explores":[54],"use":[56],"AraBERTv0.2-Twitter":[58],"embeddings":[59],"combined":[60,190],"with":[61,113,124,154,168,179,191],"ensemble":[62,202],"learning":[63],"techniques,":[64],"Bagging":[65,104,112,153,192],"and":[66,82,121,139,164,193,208,229],"Boosting":[67,117],"applied":[68],"various":[70],"classifiers,":[71],"including":[72],"Logistic":[73],"Regression":[74],"(LR),":[75],"Random":[76],"Forests":[77],"(RF),":[78],"Decision":[79],"Trees":[80],"(DT),":[81],"K-Nearest":[83],"Neighbors":[84],"(KNN).":[85],"We":[86],"evaluate":[87],"their":[88],"performance":[89,138],"on":[90,136],"a":[91,175],"dataset":[92],"Arabic":[94],"questions":[96],"under":[99],"four":[100],"experimental":[101],"conditions:":[102],"(1)":[103],"without":[105,119,156],"Synthetic":[106],"Minority":[107],"Over-sampling":[108],"Technique":[109],"(SMOTE),":[110],"(2)":[111],"SMOTE,":[114,120,157],"(3)":[115],"Adaptive":[116],"(AdaBoost)":[118],"(4)":[122],"AdaBoost":[123,167],"SMOTE.":[125,180,194],"allows":[127],"us":[128],"analyze":[130],"impact":[132],"synthetic":[134],"oversampling":[135],"classification":[137,206,221],"identify":[140],"most":[142],"effective":[143],"approach":[144],"for":[145],"addressing":[146],"data":[147],"imbalance.":[148],"Experimental":[149],"results":[150],"show":[151],"LR,":[155],"achieves":[158],"highest":[160],"Macro":[161],"F1-score":[162],"(0.8468)":[163],"G-Mean":[165,176,184],"(0.9077).":[166],"LR":[169],"benefits":[170],"significantly":[171,217],"oversampling,":[173],"reaching":[174],"(0.8921)":[178],"Additionally,":[181],"RF":[182],"improves":[183],"(0.8831)":[186],"(0.8893)":[188],"when":[189],"These":[195],"findings":[196],"highlight":[197],"role":[200],"methods":[203],"in":[204],"enhancing":[205],"fairness":[207],"accuracy.":[209],"By":[210],"integrating":[211],"such":[212],"strategies,":[213],"can":[216],"improve":[218],"automated":[220],"questions,":[224],"ensuring":[225],"receive":[227],"timely":[228],"accurate":[230],"responses":[231],"right":[234]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-28T00:00:00"}
