{"id":"https://openalex.org/W4416074225","doi":"https://doi.org/10.3390/fi17110513","title":"Machine Learning Pipeline for Early Diabetes Detection: A Comparative Study with Explainable AI","display_name":"Machine Learning Pipeline for Early Diabetes Detection: A Comparative Study with Explainable AI","publication_year":2025,"publication_date":"2025-11-10","ids":{"openalex":"https://openalex.org/W4416074225","doi":"https://doi.org/10.3390/fi17110513"},"language":"en","primary_location":{"id":"doi:10.3390/fi17110513","is_oa":true,"landing_page_url":"https://doi.org/10.3390/fi17110513","pdf_url":"https://www.mdpi.com/1999-5903/17/11/513/pdf","source":{"id":"https://openalex.org/S34838331","display_name":"Future Internet","issn_l":"1999-5903","issn":["1999-5903"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Future Internet","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-5903/17/11/513/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088513024","display_name":"Yas Barzegar","orcid":"https://orcid.org/0000-0001-5826-4488"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yas Barzegar","raw_affiliation_strings":["Department of Management, Banking and Commodity Sciences, Sapienza University, 00161 Rome, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Management, Banking and Commodity Sciences, Sapienza University, 00161 Rome, Italy","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101817780","display_name":"Atrin Barzegar","orcid":"https://orcid.org/0000-0002-4771-3935"},"institutions":[{"id":"https://openalex.org/I197809005","display_name":"University of Campania \"Luigi Vanvitelli\"","ror":"https://ror.org/02kqnpp86","country_code":"IT","type":"education","lineage":["https://openalex.org/I197809005"]},{"id":"https://openalex.org/I4210126337","display_name":"University of Campania \"Luigi Vanvitelli\"","ror":null,"country_code":"IT","type":null,"lineage":["https://openalex.org/I4210126337"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Atrin Barzegar","raw_affiliation_strings":["Mathematics, Physics and Applications to Engineering Department, Universit\u00e0 degli Studi della Campania \u201cLuigi Vanvitelli\u201d, Viale Lincoln n\u00b05, 81100 Caserta, Italy","Mathematics, Physics and Applications to Engineering Department, Universit\u00e0 Degli Studi Della Campania \u201cLuigi Vanvitelli\u201d, Viale Lincoln n\u00b05, 81100 Caserta, Italy"],"affiliations":[{"raw_affiliation_string":"Mathematics, Physics and Applications to Engineering Department, Universit\u00e0 degli Studi della Campania \u201cLuigi Vanvitelli\u201d, Viale Lincoln n\u00b05, 81100 Caserta, Italy","institution_ids":["https://openalex.org/I197809005"]},{"raw_affiliation_string":"Mathematics, Physics and Applications to Engineering Department, Universit\u00e0 Degli Studi Della Campania \u201cLuigi Vanvitelli\u201d, Viale Lincoln n\u00b05, 81100 Caserta, Italy","institution_ids":["https://openalex.org/I4210126337"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011575591","display_name":"Francesco Bellini","orcid":"https://orcid.org/0000-0002-0609-8796"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Francesco Bellini","raw_affiliation_strings":["Department of Management, Banking and Commodity Sciences, Sapienza University, 00161 Rome, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Management, Banking and Commodity Sciences, Sapienza University, 00161 Rome, Italy","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047705242","display_name":"Fabrizio D\u2019Ascenzo","orcid":"https://orcid.org/0000-0001-7627-265X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fabrizio D'Ascenzo","raw_affiliation_strings":["Department of Management, Banking and Commodity Sciences, Sapienza University, 00161 Rome, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Management, Banking and Commodity Sciences, Sapienza University, 00161 Rome, Italy","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054599235","display_name":"Irina Gorelova","orcid":"https://orcid.org/0000-0002-1205-3658"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Irina Gorelova","raw_affiliation_strings":["Department of Management, Banking and Commodity Sciences, Sapienza University, 00161 Rome, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Management, Banking and Commodity Sciences, Sapienza University, 00161 Rome, Italy","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110606442","display_name":"P. Pisani","orcid":null},"institutions":[{"id":"https://openalex.org/I71353707","display_name":"Enel (Italy)","ror":"https://ror.org/052fdp564","country_code":"IT","type":"company","lineage":["https://openalex.org/I71353707"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Patrizio Pisani","raw_affiliation_strings":["Unidata S.p.A., Viale A. G. Eiffel, 00148 Rome, Italy","Unidata S.p.A., Viale A. G. Eiffel, 00148 Roma, Italy"],"affiliations":[{"raw_affiliation_string":"Unidata S.p.A., Viale A. G. Eiffel, 00148 Rome, Italy","institution_ids":["https://openalex.org/I71353707"]},{"raw_affiliation_string":"Unidata S.p.A., Viale A. G. Eiffel, 00148 Roma, Italy","institution_ids":["https://openalex.org/I71353707"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5088513024"],"corresponding_institution_ids":[],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38610315,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"17","issue":"11","first_page":"513","last_page":"513"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11396","display_name":"Artificial Intelligence in Healthcare","score":0.8148000240325928,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11396","display_name":"Artificial Intelligence in Healthcare","score":0.8148000240325928,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.0494999997317791,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.011900000274181366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6266000270843506},{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.6141999959945679},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.5091000199317932},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5087000131607056},{"id":"https://openalex.org/keywords/logistic-regression","display_name":"Logistic regression","score":0.4805000126361847},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.47679999470710754},{"id":"https://openalex.org/keywords/feature-engineering","display_name":"Feature engineering","score":0.41690000891685486},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.3797999918460846}],"concepts":[{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.8062999844551086},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8058000206947327},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.777999997138977},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6266000270843506},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.6141999959945679},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.5091000199317932},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5087000131607056},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.4805000126361847},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.47679999470710754},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.41690000891685486},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.3797999918460846},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.3747999966144562},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.36090001463890076},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.35920000076293945},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.30140000581741333},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.29420000314712524},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C110083411","wikidata":"https://www.wikidata.org/wiki/Q1744628","display_name":"Statistical classification","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C63527458","wikidata":"https://www.wikidata.org/wiki/Q5133829","display_name":"Clinical decision support system","level":3,"score":0.25769999623298645}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/fi17110513","is_oa":true,"landing_page_url":"https://doi.org/10.3390/fi17110513","pdf_url":"https://www.mdpi.com/1999-5903/17/11/513/pdf","source":{"id":"https://openalex.org/S34838331","display_name":"Future Internet","issn_l":"1999-5903","issn":["1999-5903"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Future Internet","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:d5f0023ad40043429d9c67f6fd57d1d3","is_oa":true,"landing_page_url":"https://doaj.org/article/d5f0023ad40043429d9c67f6fd57d1d3","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Future Internet, Vol 17, Iss 11, p 513 (2025)","raw_type":"article"},{"id":"pmh:oai:iris.uniroma1.it:11573/1755154","is_oa":false,"landing_page_url":"https://hdl.handle.net/11573/1755154","pdf_url":null,"source":{"id":"https://openalex.org/S4377196107","display_name":"IRIS Research product catalog (Sapienza University of Rome)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.3390/fi17110513","is_oa":true,"landing_page_url":"https://doi.org/10.3390/fi17110513","pdf_url":"https://www.mdpi.com/1999-5903/17/11/513/pdf","source":{"id":"https://openalex.org/S34838331","display_name":"Future Internet","issn_l":"1999-5903","issn":["1999-5903"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Future Internet","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416074225.pdf","grobid_xml":"https://content.openalex.org/works/W4416074225.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,177],"use":[1],"of":[2,56,85,166,170,175],"Artificial":[3],"Intelligence":[4],"(AI)":[5],"in":[6,209],"healthcare":[7,220],"has":[8],"significantly":[9],"advanced":[10,42],"early":[11],"disease":[12],"detection,":[13],"enabling":[14],"timely":[15],"diagnosis":[16],"and":[17,58,78,96,101,112,132,157,172,188,205],"improved":[18],"patient":[19],"outcomes.":[20],"This":[21,195],"work":[22],"proposes":[23],"an":[24,164,168,173],"end-to-end":[25],"machine":[26],"learning":[27],"(ML)":[28],"model":[29,178],"for":[30,82,109,202,216],"predicting":[31],"diabetes":[32,86],"based":[33],"on":[34,91,182],"data":[35],"quality":[36],"by":[37,44,130],"following":[38],"key":[39],"steps,":[40],"including":[41],"preprocessing":[43],"KNN":[45],"imputation,":[46],"intelligent":[47],"feature":[48],"selection,":[49],"class":[50],"imbalance":[51],"with":[52,141],"a":[53,189,199,213],"hybrid":[54],"approach":[55],"SMOTEENN,":[57],"multi-model":[59],"classification.":[60],"We":[61,88,103],"rigorously":[62],"compared":[63],"nine":[64],"ML":[65,207],"classifiers,":[66],"namely":[67],"ensemble":[68,148],"approaches":[69],"(Random":[70],"Forest,":[71],"CatBoost,":[72],"XGBoost),":[73],"Support":[74],"Vector":[75],"Machines":[76],"(SVM),":[77],"Logistic":[79],"Regression":[80],"(LR)":[81],"the":[83,114,126,151,155,160],"prediction":[84],"disease.":[87],"evaluated":[89],"performance":[90,153],"specificity,":[92],"accuracy,":[93],"recall,":[94],"precision,":[95],"F1-score":[97,174],"to":[98],"assess":[99],"generalizability":[100],"robustness.":[102],"employed":[104],"SHapley":[105],"Additive":[106],"exPlanations":[107],"(SHAP)":[108],"explainability,":[110],"ranking,":[111],"identifying":[113],"most":[115],"influential":[116],"clinical":[117,210],"risk":[118,137],"factors.":[119],"SHAP":[120],"analysis":[121],"identified":[122],"glucose":[123],"levels":[124],"as":[125,212],"dominant":[127],"predictor,":[128],"followed":[129],"BMI":[131],"age,":[133],"providing":[134],"clinically":[135],"interpretable":[136,206],"factors":[138],"that":[139,147],"align":[140],"established":[142],"medical":[143],"knowledge.":[144],"Results":[145],"indicate":[146],"models":[149,208],"have":[150],"highest":[152],"among":[154],"others,":[156],"CatBoost":[158],"performed":[159],"best,":[161],"which":[162],"achieved":[163],"ROC-AUC":[165],"0.972,":[167],"accuracy":[169],"0.968,":[171],"0.971.":[176],"was":[179],"successfully":[180],"validated":[181],"two":[183],"larger":[184],"datasets":[185],"(CDC":[186],"BRFSS":[187],"130-hospital":[190],"dataset),":[191],"confirming":[192],"its":[193],"generalizability.":[194],"data-driven":[196],"design":[197],"provides":[198],"reproducible":[200],"platform":[201],"applying":[203],"useful":[204],"practice":[211],"primary":[214],"application":[215],"future":[217],"Internet-of-Things-based":[218],"smart":[219],"systems.":[221]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-11-10T00:00:00"}
