{"id":"https://openalex.org/W4399734362","doi":"https://doi.org/10.1186/s40537-024-00943-4","title":"Data oversampling and imbalanced datasets: an investigation of performance for machine learning and feature engineering","display_name":"Data oversampling and imbalanced datasets: an investigation of performance for machine learning and feature engineering","publication_year":2024,"publication_date":"2024-06-17","ids":{"openalex":"https://openalex.org/W4399734362","doi":"https://doi.org/10.1186/s40537-024-00943-4"},"language":"en","primary_location":{"id":"doi:10.1186/s40537-024-00943-4","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-024-00943-4","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-024-00943-4","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-024-00943-4","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047043390","display_name":"Muhammad Mujahid","orcid":"https://orcid.org/0009-0005-5751-5528"},"institutions":[{"id":"https://openalex.org/I142024983","display_name":"Prince Sultan University","ror":"https://ror.org/053mqrf26","country_code":"SA","type":"education","lineage":["https://openalex.org/I142024983"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Muhammad Mujahid","raw_affiliation_strings":["Artificial Intelligence and Data Analytics (AIDA) Lab, CCIS, Prince Sultan University, Riyadh, 11586, Saudi Arabia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Artificial Intelligence and Data Analytics (AIDA) Lab, CCIS, Prince Sultan University, Riyadh, 11586, Saudi Arabia","institution_ids":["https://openalex.org/I142024983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026215443","display_name":"Erol K\u0131na","orcid":"https://orcid.org/0000-0002-7785-646X"},"institutions":[{"id":"https://openalex.org/I118567650","display_name":"Van Y\u00fcz\u00fcnc\u00fc Y\u0131l \u00dcniversitesi","ror":"https://ror.org/041jyzp61","country_code":"TR","type":"education","lineage":["https://openalex.org/I118567650"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"EROL K\u0131na","raw_affiliation_strings":["\u00d6zalp Vocational School, Van Y\u00fcz\u00fcnc\u00fc Yil University, Van, 65100, Turkey"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"\u00d6zalp Vocational School, Van Y\u00fcz\u00fcnc\u00fc Yil University, Van, 65100, Turkey","institution_ids":["https://openalex.org/I118567650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058941449","display_name":"Furqan Rustam","orcid":"https://orcid.org/0000-0001-8403-1047"},"institutions":[{"id":"https://openalex.org/I100930933","display_name":"University College Dublin","ror":"https://ror.org/05m7pjf47","country_code":"IE","type":"education","lineage":["https://openalex.org/I100930933"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Furqan Rustam","raw_affiliation_strings":["School of Computer Science, University College Dublin, Dublin, D04 V1W8, Ireland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, University College Dublin, Dublin, D04 V1W8, Ireland","institution_ids":["https://openalex.org/I100930933"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061049695","display_name":"M\u00f3nica Gracia Villar","orcid":"https://orcid.org/0000-0002-8547-9246"},"institutions":[{"id":"https://openalex.org/I4210097234","display_name":"Universidad Internacional","ror":"https://ror.org/00cxp1a86","country_code":"MX","type":"education","lineage":["https://openalex.org/I4210097234"]},{"id":"https://openalex.org/I4210128504","display_name":"Universidad Europea del Atl\u00e1ntico","ror":"https://ror.org/048tesw25","country_code":"ES","type":"education","lineage":["https://openalex.org/I4210128504"]},{"id":"https://openalex.org/I4210157569","display_name":"Centro Universit\u00e1rio Internacional","ror":"https://ror.org/04j9vr008","country_code":"MX","type":"education","lineage":["https://openalex.org/I4210157569"]}],"countries":["ES","MX"],"is_corresponding":false,"raw_author_name":"Monica Gracia Villar","raw_affiliation_strings":["Universidad Europea del Atlantico, Isabel Torres 21, Santander, 39011, Spain","Universidad Internacional Iberoamericana Arecibo, Puerto Rico, 00613, USA","Universidade Internacional do Cuanza, Bie, Angola"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universidad Europea del Atlantico, Isabel Torres 21, Santander, 39011, Spain","institution_ids":["https://openalex.org/I4210128504"]},{"raw_affiliation_string":"Universidad Internacional Iberoamericana Arecibo, Puerto Rico, 00613, USA","institution_ids":["https://openalex.org/I4210097234"]},{"raw_affiliation_string":"Universidade Internacional do Cuanza, Bie, Angola","institution_ids":["https://openalex.org/I4210157569"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061682362","display_name":"Eduardo Silva Alvarado","orcid":"https://orcid.org/0000-0003-4011-9391"},"institutions":[{"id":"https://openalex.org/I205571821","display_name":"Ibero American University","ror":"https://ror.org/05vss7635","country_code":"MX","type":"education","lineage":["https://openalex.org/I205571821"]},{"id":"https://openalex.org/I4210097234","display_name":"Universidad Internacional","ror":"https://ror.org/00cxp1a86","country_code":"MX","type":"education","lineage":["https://openalex.org/I4210097234"]},{"id":"https://openalex.org/I4210128504","display_name":"Universidad Europea del Atl\u00e1ntico","ror":"https://ror.org/048tesw25","country_code":"ES","type":"education","lineage":["https://openalex.org/I4210128504"]},{"id":"https://openalex.org/I4391767878","display_name":"Universidad de la Romana","ror":"https://ror.org/051sm7d31","country_code":null,"type":"education","lineage":["https://openalex.org/I4391767878"]},{"id":"https://openalex.org/I87806610","display_name":"Ibero-American University Puebla","ror":"https://ror.org/00pcv0g02","country_code":"MX","type":"education","lineage":["https://openalex.org/I87806610"]}],"countries":["ES","MX"],"is_corresponding":false,"raw_author_name":"Eduardo Silva Alvarado","raw_affiliation_strings":["Universidad Europea del Atlantico, Isabel Torres 21, Santander, 39011, Spain","Universidad Internacional Iberoamericana, Campeche, 24560, Mexico","Universidad de La Romana, La Romana, Dominican Republic"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universidad Europea del Atlantico, Isabel Torres 21, Santander, 39011, Spain","institution_ids":["https://openalex.org/I4210128504"]},{"raw_affiliation_string":"Universidad Internacional Iberoamericana, Campeche, 24560, Mexico","institution_ids":["https://openalex.org/I4210097234","https://openalex.org/I87806610","https://openalex.org/I205571821"]},{"raw_affiliation_string":"Universidad de La Romana, La Romana, Dominican Republic","institution_ids":["https://openalex.org/I4391767878"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046258353","display_name":"Isabel de la Torre D\u00edez","orcid":"https://orcid.org/0000-0003-3134-7720"},"institutions":[{"id":"https://openalex.org/I108103353","display_name":"Universidad de Valladolid","ror":"https://ror.org/01fvbaw18","country_code":"ES","type":"education","lineage":["https://openalex.org/I108103353"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Isabel De La Torre Diez","raw_affiliation_strings":["Department of Signal Theory and Communications and Telematic Engineering, University of Valladolid, Paseo de Belen 15, Valladolid, 47011, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Signal Theory and Communications and Telematic Engineering, University of Valladolid, Paseo de Belen 15, Valladolid, 47011, Spain","institution_ids":["https://openalex.org/I108103353"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074629800","display_name":"Imran Ashraf","orcid":"https://orcid.org/0000-0002-8271-6496"},"institutions":[{"id":"https://openalex.org/I55240360","display_name":"Yeungnam University","ror":"https://ror.org/05yc6p159","country_code":"KR","type":"education","lineage":["https://openalex.org/I55240360"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Imran Ashraf","raw_affiliation_strings":["Department of Information and Communication Engineering, Yeungnam University, Gyeongsan, 38541, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Information and Communication Engineering, Yeungnam University, Gyeongsan, 38541, Republic of Korea","institution_ids":["https://openalex.org/I55240360"]}]}],"institutions":[],"countries_distinct_count":6,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5074629800"],"corresponding_institution_ids":["https://openalex.org/I55240360"],"apc_list":{"value":1060,"currency":"GBP","value_usd":1300},"apc_paid":{"value":1060,"currency":"GBP","value_usd":1300},"fwci":44.6857,"has_fulltext":true,"cited_by_count":142,"citation_normalized_percentile":{"value":0.99886721,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"11","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8383530378341675},{"id":"https://openalex.org/keywords/oversampling","display_name":"Oversampling","score":0.8352810144424438},{"id":"https://openalex.org/keywords/computational-science-and-engineering","display_name":"Computational Science and Engineering","score":0.8107978105545044},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.678119421005249},{"id":"https://openalex.org/keywords/feature-engineering","display_name":"Feature engineering","score":0.6748011708259583},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.592990517616272},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5647469758987427},{"id":"https://openalex.org/keywords/science-and-engineering","display_name":"Science and engineering","score":0.4103671610355377},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.396648645401001},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.357136070728302},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3291316032409668},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.10982632637023926}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8383530378341675},{"id":"https://openalex.org/C197323446","wikidata":"https://www.wikidata.org/wiki/Q331222","display_name":"Oversampling","level":3,"score":0.8352810144424438},{"id":"https://openalex.org/C68597687","wikidata":"https://www.wikidata.org/wiki/Q362601","display_name":"Computational Science and Engineering","level":2,"score":0.8107978105545044},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.678119421005249},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.6748011708259583},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.592990517616272},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5647469758987427},{"id":"https://openalex.org/C2993955422","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Science and engineering","level":2,"score":0.4103671610355377},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.396648645401001},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.357136070728302},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3291316032409668},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.10982632637023926},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0},{"id":"https://openalex.org/C55587333","wikidata":"https://www.wikidata.org/wiki/Q1133029","display_name":"Engineering ethics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1186/s40537-024-00943-4","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-024-00943-4","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-024-00943-4","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},{"id":"pmh:661ea2a6-93fc-4c62-87fc-d23fa62ebed4","is_oa":true,"landing_page_url":"https://avesis.yyu.edu.tr/publication/details/661ea2a6-93fc-4c62-87fc-d23fa62ebed4/oai","pdf_url":"https://avesis.yyu.edu.tr/publication/details/661ea2a6-93fc-4c62-87fc-d23fa62ebed4/oai/document.pdf","source":{"id":"https://openalex.org/S7407055328","display_name":"Van Y\u00fcz\u00fcnc\u00fc Y\u0131l University Academic Data Management System","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"},{"id":"pmh:oai:doaj.org/article:ebc193eee4c148639cb2c008dd48baf6","is_oa":false,"landing_page_url":"https://doaj.org/article/ebc193eee4c148639cb2c008dd48baf6","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Big Data, Vol 11, Iss 1, Pp 1-32 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s40537-024-00943-4","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-024-00943-4","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-024-00943-4","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7107681410","display_name":null,"funder_award_id":"11586","funder_id":"https://openalex.org/F4320327702","funder_display_name":"Prince Sultan University"}],"funders":[{"id":"https://openalex.org/F4320327702","display_name":"Prince Sultan University","ror":"https://ror.org/053mqrf26"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4399734362.pdf"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W1513874326","https://openalex.org/W1522930027","https://openalex.org/W1941659294","https://openalex.org/W1980867644","https://openalex.org/W1981976602","https://openalex.org/W1996216253","https://openalex.org/W1999318832","https://openalex.org/W2023450550","https://openalex.org/W2055130908","https://openalex.org/W2148143831","https://openalex.org/W2488148959","https://openalex.org/W2626590196","https://openalex.org/W2766296277","https://openalex.org/W2779564163","https://openalex.org/W2800516023","https://openalex.org/W2804019762","https://openalex.org/W2836242602","https://openalex.org/W2886346335","https://openalex.org/W2907225684","https://openalex.org/W2953618482","https://openalex.org/W2990694629","https://openalex.org/W3036069605","https://openalex.org/W3039551013","https://openalex.org/W3083850392","https://openalex.org/W3104879322","https://openalex.org/W3112644798","https://openalex.org/W3136003884","https://openalex.org/W3156338075","https://openalex.org/W3159629268","https://openalex.org/W3160046514","https://openalex.org/W3164947024","https://openalex.org/W3199146084","https://openalex.org/W3205788768","https://openalex.org/W4205928658","https://openalex.org/W4206646040","https://openalex.org/W4207039276","https://openalex.org/W4214848113","https://openalex.org/W4230285502","https://openalex.org/W4240980770","https://openalex.org/W4247922705","https://openalex.org/W4247993926","https://openalex.org/W4252665217","https://openalex.org/W4312905724","https://openalex.org/W4367146937","https://openalex.org/W4368362619","https://openalex.org/W4383892345","https://openalex.org/W4386266758","https://openalex.org/W4391472065","https://openalex.org/W4400870032","https://openalex.org/W6631307659"],"related_works":["https://openalex.org/W2766503024","https://openalex.org/W2781247653","https://openalex.org/W2750075801","https://openalex.org/W3164948662","https://openalex.org/W3153597579","https://openalex.org/W4385336128","https://openalex.org/W4394398790","https://openalex.org/W4399455186","https://openalex.org/W4239561299","https://openalex.org/W4382766289"],"abstract_inverted_index":{"Abstract":[0],"The":[1,16,209,262],"classification":[2],"of":[3,18,33,43,50,89,96,105,128,158,187,244],"imbalanced":[4,51,90,149],"datasets":[5,91,151,251],"is":[6,23],"a":[7,30,39,48,274],"prominent":[8],"task":[9],"in":[10,20,54],"text":[11],"mining":[12],"and":[13,80,92,113,130,179,190,193,214,241,246,252],"machine":[14,73,97,163],"learning.":[15],"number":[17,32],"samples":[19,34],"each":[21],"class":[22,28],"not":[24],"uniformly":[25],"distributed;":[26],"one":[27],"contains":[29],"large":[31],"while":[35],"the":[36,44,87,94,103,118,126,134,140,156,185,207,238],"other":[37,220,287],"has":[38],"small":[40],"number.":[41],"Overfitting":[42],"model":[45,264],"occurs":[46],"as":[47,284],"result":[49],"datasets,":[52],"resulting":[53],"poor":[55],"performance.":[56],"In":[57,183],"this":[58],"study,":[59],"we":[60],"compare":[61,155],"different":[62],"oversampling":[63,68,84,159],"techniques":[64,160,221],"like":[65],"synthetic":[66,82],"minority":[67],"technique":[69],"(SMOTE),":[70],"support":[71],"vector":[72],"SMOTE":[74,213,256],"(SVM-SMOTE),":[75],"Border-line":[76],"SMOTE,":[77,79],"K-means":[78],"adaptive":[81],"(ADASYN)":[83],"to":[85,120,154,203,236,286],"address":[86],"issue":[88],"enhance":[93],"performance":[95,157],"learning":[98,164],"models.":[99],"Preprocessing":[100],"significantly":[101],"enhances":[102],"quality":[104],"input":[106],"data":[107,141],"by":[108],"reducing":[109],"noise,":[110],"redundant":[111],"data,":[112],"unnecessary":[114],"data.":[115,136],"This":[116,137],"enables":[117],"machines":[119],"identify":[121],"crucial":[122],"patterns":[123],"that":[124,212,230],"facilitate":[125],"extraction":[127,199],"significant":[129],"pertinent":[131],"information":[132],"from":[133,206],"preprocessed":[135],"study":[138],"preprocesses":[139],"using":[142,265],"various":[143],"top-level":[144],"preprocessing":[145],"steps.":[146],"Furthermore,":[147],"two":[148],"Twitter":[150],"are":[152,201],"used":[153,202],"with":[161,232,259,273],"six":[162],"models":[165],"including":[166],"random":[167],"forest":[168],"(RF),":[169],"SVM,":[170],"K-nearest":[171],"neighbor":[172],"(KNN),":[173],"AdaBoost":[174],"(ADA),":[175],"logistic":[176],"regression":[177],"(LR),":[178],"decision":[180],"tree":[181],"(DT).":[182],"addition,":[184],"bag":[186],"words":[188],"(BoW)":[189],"term":[191],"frequency":[192,196],"inverse":[194],"document":[195],"(TF-IDF)":[197],"features":[198,205],"approaches":[200],"extract":[204],"tweets.":[208],"experiments":[210,268],"indicate":[211],"ADASYN":[215,249],"perform":[216],"much":[217],"better":[218],"than":[219],"thus":[222],"providing":[223],"higher":[224],"accuracy.":[225],"Additionally,":[226],"overall":[227],"results":[228],"show":[229],"SVM":[231,263],"\u2019linear\u2019":[233],"kernel":[234],"tends":[235],"attain":[237],"highest":[239],"accuracy":[240,254,272,283],"recall":[242],"score":[243],"99.67%":[245],"1.00%":[247],"on":[248,255],"oversampled":[250,257],"99.57%":[253],"dataset":[258],"TF-IDF":[260],"features.":[261],"10-fold":[266],"cross-validation":[267],"achieved":[269,280],"97.40":[270],"mean":[271],"0.008":[275],"standard":[276],"deviation.":[277],"Our":[278],"approach":[279],"2.62%":[281],"greater":[282],"compared":[285],"current":[288],"methods.":[289]},"counts_by_year":[{"year":2026,"cited_by_count":29},{"year":2025,"cited_by_count":101},{"year":2024,"cited_by_count":12}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
