{"id":"https://openalex.org/W4394837541","doi":"https://doi.org/10.3390/make6020039","title":"Impact of Nature of Medical Data on Machine and Deep Learning for Imbalanced Datasets: Clinical Validity of SMOTE Is Questionable","display_name":"Impact of Nature of Medical Data on Machine and Deep Learning for Imbalanced Datasets: Clinical Validity of SMOTE Is Questionable","publication_year":2024,"publication_date":"2024-04-15","ids":{"openalex":"https://openalex.org/W4394837541","doi":"https://doi.org/10.3390/make6020039"},"language":"en","primary_location":{"id":"doi:10.3390/make6020039","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make6020039","pdf_url":"https://www.mdpi.com/2504-4990/6/2/39/pdf?version=1713176907","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-4990/6/2/39/pdf?version=1713176907","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056470663","display_name":"Seifollah Gholampour","orcid":"https://orcid.org/0000-0002-4924-4239"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]},{"id":"https://openalex.org/I4210157890","display_name":"Neurological Surgery","ror":"https://ror.org/04r0gp612","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210157890"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Seifollah Gholampour","raw_affiliation_strings":["Department of Neurological Surgery, University of Chicago, Chicago, IL 60637, USA"],"raw_orcid":"https://orcid.org/0000-0002-4924-4239","affiliations":[{"raw_affiliation_string":"Department of Neurological Surgery, University of Chicago, Chicago, IL 60637, USA","institution_ids":["https://openalex.org/I40347166","https://openalex.org/I4210157890"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5056470663"],"corresponding_institution_ids":["https://openalex.org/I40347166","https://openalex.org/I4210157890"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":10.4128,"has_fulltext":true,"cited_by_count":32,"citation_normalized_percentile":{"value":0.98568053,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"6","issue":"2","first_page":"827","last_page":"841"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11396","display_name":"Artificial Intelligence in Healthcare","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.7546855211257935},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7198958396911621},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6663712859153748},{"id":"https://openalex.org/keywords/oversampling","display_name":"Oversampling","score":0.6177504658699036},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5840955972671509},{"id":"https://openalex.org/keywords/resampling","display_name":"Resampling","score":0.49807095527648926},{"id":"https://openalex.org/keywords/principal-component-analysis","display_name":"Principal component analysis","score":0.43584853410720825},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.431879460811615},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4241781234741211},{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.42366737127304077},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4190469980239868},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.37790629267692566},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10719290375709534}],"concepts":[{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.7546855211257935},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7198958396911621},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6663712859153748},{"id":"https://openalex.org/C197323446","wikidata":"https://www.wikidata.org/wiki/Q331222","display_name":"Oversampling","level":3,"score":0.6177504658699036},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5840955972671509},{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.49807095527648926},{"id":"https://openalex.org/C27438332","wikidata":"https://www.wikidata.org/wiki/Q2873","display_name":"Principal component analysis","level":2,"score":0.43584853410720825},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.431879460811615},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4241781234741211},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.42366737127304077},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4190469980239868},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37790629267692566},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10719290375709534},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/make6020039","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make6020039","pdf_url":"https://www.mdpi.com/2504-4990/6/2/39/pdf?version=1713176907","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},{"id":"pmh:oai:uchicago.tind.io:11571","is_oa":true,"landing_page_url":"http://knowledge.uchicago.edu/record/11571","pdf_url":null,"source":{"id":"https://openalex.org/S4306402460","display_name":"Knowledge@UChicago (University of Chicago)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I40347166","host_organization_name":"University of Chicago","host_organization_lineage":["https://openalex.org/I40347166"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://knowledge.uchicago.edu/record/11571","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:77345c495b5b4445b13a20d90b01c1a4","is_oa":false,"landing_page_url":"https://doaj.org/article/77345c495b5b4445b13a20d90b01c1a4","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning and Knowledge Extraction, Vol 6, Iss 2, Pp 827-841 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/make6020039","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make6020039","pdf_url":"https://www.mdpi.com/2504-4990/6/2/39/pdf?version=1713176907","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.7099999785423279}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4394837541.pdf"},"referenced_works_count":67,"referenced_works":["https://openalex.org/W122356390","https://openalex.org/W1836346949","https://openalex.org/W1941659294","https://openalex.org/W1983391648","https://openalex.org/W2032867948","https://openalex.org/W2060015714","https://openalex.org/W2082344779","https://openalex.org/W2084853781","https://openalex.org/W2098254218","https://openalex.org/W2117443779","https://openalex.org/W2123458540","https://openalex.org/W2126734246","https://openalex.org/W2300921526","https://openalex.org/W2409438660","https://openalex.org/W2508098092","https://openalex.org/W2520599539","https://openalex.org/W2523654281","https://openalex.org/W2537719315","https://openalex.org/W2558658972","https://openalex.org/W2562319768","https://openalex.org/W2585605401","https://openalex.org/W2585770658","https://openalex.org/W2737589333","https://openalex.org/W2793544788","https://openalex.org/W2800788706","https://openalex.org/W2801609441","https://openalex.org/W2803174346","https://openalex.org/W2889489918","https://openalex.org/W2908465383","https://openalex.org/W2914361121","https://openalex.org/W2981817161","https://openalex.org/W3007335446","https://openalex.org/W3015088739","https://openalex.org/W3034143844","https://openalex.org/W3040126871","https://openalex.org/W3047101549","https://openalex.org/W3049455790","https://openalex.org/W3088053218","https://openalex.org/W3088242795","https://openalex.org/W3118754300","https://openalex.org/W3120655457","https://openalex.org/W3129693849","https://openalex.org/W3140323468","https://openalex.org/W3158264953","https://openalex.org/W3203004730","https://openalex.org/W3216794305","https://openalex.org/W4206330621","https://openalex.org/W4220836010","https://openalex.org/W4220874644","https://openalex.org/W4220916933","https://openalex.org/W4224037847","https://openalex.org/W4226084339","https://openalex.org/W4285212650","https://openalex.org/W4308627565","https://openalex.org/W4313421210","https://openalex.org/W4316021877","https://openalex.org/W4390608816","https://openalex.org/W4390692457","https://openalex.org/W4390796354","https://openalex.org/W4390805998","https://openalex.org/W4391092624","https://openalex.org/W4392906619","https://openalex.org/W4392923298","https://openalex.org/W6674940759","https://openalex.org/W6792092603","https://openalex.org/W6839944456","https://openalex.org/W6848527601"],"related_works":["https://openalex.org/W2766503024","https://openalex.org/W2781247653","https://openalex.org/W4206637278","https://openalex.org/W4386005305","https://openalex.org/W3082051559","https://openalex.org/W4386214543","https://openalex.org/W3022501507","https://openalex.org/W4205824301","https://openalex.org/W3188632291","https://openalex.org/W2023639956"],"abstract_inverted_index":{"Dataset":[0],"imbalances":[1],"pose":[2],"a":[3],"significant":[4],"challenge":[5],"to":[6,25,62,108,277,291],"predictive":[7],"modeling":[8,240],"in":[9,86,135,156,249,307],"both":[10],"medical":[11,42,68,88,139,250],"and":[12,20,34,54,69,94,124,146,187,204,233,242,275,298],"financial":[13,70,110],"domains,":[14],"where":[15],"conventional":[16],"strategies,":[17],"including":[18],"resampling":[19,255],"algorithmic":[21,243],"modifications,":[22],"often":[23],"fail":[24],"adequately":[26],"address":[27],"minority":[28,48,84,151,167],"class":[29,85,152],"underrepresentation.":[30],"This":[31,128,257],"study":[32,283],"theoretically":[33,269],"practically":[35],"investigates":[36],"how":[37],"the":[38,45,77,83,87,97,104,109,131,136,144,150,165,188,214,226,230,235,260,285,293,299],"inherent":[39,134],"nature":[40,137],"of":[41,47,76,82,138,149,164,190,217,229,262,287,303],"data":[43],"affects":[44],"classification":[46,81],"classes.":[49],"It":[50],"employs":[51],"ten":[52],"machine":[53],"deep":[55],"learning":[56],"classifiers,":[57],"ranging":[58],"from":[59],"ensemble":[60],"learners":[61],"cost-sensitive":[63],"algorithms,":[64],"across":[65],"comparably":[66],"sized":[67],"datasets.":[71],"Despite":[72],"these":[73],"efforts,":[74],"none":[75],"classifiers":[78,106],"achieved":[79],"effective":[80],"dataset,":[89],"with":[90,116],"sensitivity":[91,121],"below":[92,100],"5.0%":[93],"area":[95],"under":[96],"curve":[98],"(AUC)":[99],"57.0%.":[101],"In":[102],"contrast,":[103],"similar":[105],"applied":[107],"dataset":[111,219],"demonstrated":[112],"strong":[113],"discriminative":[114],"power,":[115],"overall":[117],"accuracy":[118],"exceeding":[119],"95.0%,":[120],"over":[122],"73.0%,":[123],"AUC":[125],"above":[126],"96.0%.":[127],"disparity":[129],"underscores":[130,259,284],"unpredictable":[132],"variability":[133],"data,":[140],"as":[141],"exemplified":[142],"by":[143],"dispersed":[145],"homogeneous":[147],"distribution":[148],"among":[153],"other":[154],"classes":[155],"principal":[157],"component":[158],"analysis":[159],"(PCA)":[160],"graphs.":[161],"The":[162],"application":[163],"synthetic":[166,173,218],"oversampling":[168],"technique":[169,232],"(SMOTE)":[170],"introduced":[171],"62":[172],"patients":[174],"based":[175],"on":[176,183,254],"merely":[177],"20":[178],"original":[179],"cases,":[180],"casting":[181],"doubt":[182],"its":[184],"clinical":[185,215,227,279,301],"validity":[186,228],"representation":[189],"real-world":[191,278],"patient":[192],"variability.":[193],"Furthermore,":[194],"post-SMOTE":[195],"feature":[196],"importance":[197,261,286],"analysis,":[198],"utilizing":[199],"SHapley":[200],"Additive":[201],"exPlanations":[202],"(SHAP)":[203],"tree-based":[205],"methods,":[206],"contradicted":[207],"established":[208],"cerebral":[209],"stroke":[210],"parameters,":[211],"further":[212],"questioning":[213],"coherence":[216],"augmentation.":[220],"These":[221],"findings":[222],"call":[223],"into":[224],"question":[225],"SMOTE":[231,306],"underscore":[234],"urgent":[236],"need":[237],"for":[238,245],"advanced":[239],"techniques":[241],"innovations":[244],"predicting":[246],"minority-class":[247],"outcomes":[248],"datasets":[251],"without":[252],"depending":[253],"strategies.":[256],"approach":[258],"developing":[263],"methods":[264],"that":[265],"are":[266],"not":[267],"only":[268],"robust":[270],"but":[271],"also":[272],"clinically":[273],"relevant":[274],"applicable":[276],"scenarios.":[280],"Consequently,":[281],"this":[282],"future":[288],"research":[289],"efforts":[290],"bridge":[292],"gap":[294],"between":[295],"theoretical":[296],"advancements":[297],"practical,":[300],"applications":[302],"models":[304],"like":[305],"healthcare.":[308]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":19},{"year":2024,"cited_by_count":5}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
