{"id":"https://openalex.org/W4391761597","doi":"https://doi.org/10.1145/3643643","title":"Do We Really Need Imputation in AutoML Predictive Modeling?","display_name":"Do We Really Need Imputation in AutoML Predictive Modeling?","publication_year":2024,"publication_date":"2024-02-16","ids":{"openalex":"https://openalex.org/W4391761597","doi":"https://doi.org/10.1145/3643643"},"language":"en","primary_location":{"id":"doi:10.1145/3643643","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3643643","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3643643","source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3643643","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102045630","display_name":"George Paterakis","orcid":"https://orcid.org/0009-0005-8856-8809"},"institutions":[{"id":"https://openalex.org/I142617266","display_name":"University of Crete","ror":"https://ror.org/00dr28g20","country_code":"GR","type":"education","lineage":["https://openalex.org/I142617266"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"George Paterakis","raw_affiliation_strings":["Computer Science Department, University of Crete, Heraklion, Greece"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, University of Crete, Heraklion, Greece","institution_ids":["https://openalex.org/I142617266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091317391","display_name":"Stefanos Fafalios","orcid":"https://orcid.org/0009-0007-6722-0373"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stefanos Fafalios","raw_affiliation_strings":["JADBio Gnosis DA S.A, Heraklion, Greece"],"affiliations":[{"raw_affiliation_string":"JADBio Gnosis DA S.A, Heraklion, Greece","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028222654","display_name":"Paulos Charonyktakis","orcid":"https://orcid.org/0000-0002-6899-4262"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Paulos Charonyktakis","raw_affiliation_strings":["JADBio Gnosis DA S.A., Heraklion, Greece"],"affiliations":[{"raw_affiliation_string":"JADBio Gnosis DA S.A., Heraklion, Greece","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042003138","display_name":"Vassilis Christophides","orcid":"https://orcid.org/0000-0002-2076-1881"},"institutions":[{"id":"https://openalex.org/I86175216","display_name":"\u00c9cole Nationale Sup\u00e9rieure de l'\u00c9lectronique et de ses Applications","ror":"https://ror.org/03qeacd72","country_code":"FR","type":"education","lineage":["https://openalex.org/I86175216"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Vassilis Christophides","raw_affiliation_strings":["ENSEA, ETIS, Cergy, France"],"affiliations":[{"raw_affiliation_string":"ENSEA, ETIS, Cergy, France","institution_ids":["https://openalex.org/I86175216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070582381","display_name":"Ioannis Tsamardinos","orcid":"https://orcid.org/0000-0002-2492-959X"},"institutions":[{"id":"https://openalex.org/I142617266","display_name":"University of Crete","ror":"https://ror.org/00dr28g20","country_code":"GR","type":"education","lineage":["https://openalex.org/I142617266"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Ioannis Tsamardinos","raw_affiliation_strings":["Computer Science Department, University of Crete, Heraklion, Greece"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, University of Crete, Heraklion, Greece","institution_ids":["https://openalex.org/I142617266"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5102045630"],"corresponding_institution_ids":["https://openalex.org/I142617266"],"apc_list":null,"apc_paid":null,"fwci":1.3901,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.82810104,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"18","issue":"6","first_page":"1","last_page":"64"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9858999848365784,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/imputation","display_name":"Imputation (statistics)","score":0.8631058931350708},{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.8619213700294495},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.654749870300293},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5840277075767517},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.558483362197876},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.5570672750473022},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5249959230422974},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.5154524445533752},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4964504837989807},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.4777989983558655},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.43753761053085327},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.328622430562973},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.19782254099845886},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16627180576324463}],"concepts":[{"id":"https://openalex.org/C58041806","wikidata":"https://www.wikidata.org/wiki/Q1660484","display_name":"Imputation (statistics)","level":3,"score":0.8631058931350708},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.8619213700294495},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.654749870300293},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5840277075767517},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.558483362197876},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.5570672750473022},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5249959230422974},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.5154524445533752},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4964504837989807},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.4777989983558655},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.43753761053085327},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.328622430562973},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.19782254099845886},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16627180576324463},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3643643","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3643643","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3643643","source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-04452761v1","is_oa":false,"landing_page_url":"https://hal.science/hal-04452761","pdf_url":null,"source":{"id":"https://openalex.org/S4406922466","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data (TKDD), In press","raw_type":"Journal articles"}],"best_oa_location":{"id":"doi:10.1145/3643643","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3643643","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3643643","source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320327859","display_name":"Hellenic Foundation for Research and Innovation","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4391761597.pdf","grobid_xml":"https://content.openalex.org/works/W4391761597.grobid-xml"},"referenced_works_count":66,"referenced_works":["https://openalex.org/W1238965463","https://openalex.org/W1558866804","https://openalex.org/W1822348759","https://openalex.org/W1976364950","https://openalex.org/W1979646154","https://openalex.org/W1981276685","https://openalex.org/W2004144464","https://openalex.org/W2064186732","https://openalex.org/W2065974896","https://openalex.org/W2096863518","https://openalex.org/W2100358124","https://openalex.org/W2102539288","https://openalex.org/W2110065044","https://openalex.org/W2115098571","https://openalex.org/W2125027820","https://openalex.org/W2130347324","https://openalex.org/W2132862423","https://openalex.org/W2135046866","https://openalex.org/W2141019052","https://openalex.org/W2146130798","https://openalex.org/W2162313689","https://openalex.org/W2163150789","https://openalex.org/W2167546040","https://openalex.org/W2167942713","https://openalex.org/W2317439525","https://openalex.org/W2480680997","https://openalex.org/W2501909709","https://openalex.org/W2534722177","https://openalex.org/W2542806636","https://openalex.org/W2591700809","https://openalex.org/W2788592841","https://openalex.org/W2803403013","https://openalex.org/W2808622828","https://openalex.org/W2897574832","https://openalex.org/W2904561288","https://openalex.org/W2955219525","https://openalex.org/W2955443275","https://openalex.org/W2963227653","https://openalex.org/W2990138404","https://openalex.org/W2991326283","https://openalex.org/W2997591727","https://openalex.org/W2997919412","https://openalex.org/W3003365835","https://openalex.org/W3007665943","https://openalex.org/W3017317785","https://openalex.org/W3035965352","https://openalex.org/W3044965819","https://openalex.org/W3046991059","https://openalex.org/W3099878876","https://openalex.org/W3113310528","https://openalex.org/W3124308904","https://openalex.org/W3125213913","https://openalex.org/W3130968222","https://openalex.org/W3157791054","https://openalex.org/W3170657538","https://openalex.org/W3179164182","https://openalex.org/W3197003309","https://openalex.org/W3217140273","https://openalex.org/W4205556549","https://openalex.org/W4223923854","https://openalex.org/W4226239514","https://openalex.org/W4280494175","https://openalex.org/W4281294571","https://openalex.org/W4283020005","https://openalex.org/W4312125939","https://openalex.org/W6772013027"],"related_works":["https://openalex.org/W2181530120","https://openalex.org/W4211215373","https://openalex.org/W2024529227","https://openalex.org/W2055961818","https://openalex.org/W2903115227","https://openalex.org/W1574575415","https://openalex.org/W3144172081","https://openalex.org/W3179858851","https://openalex.org/W2081476516","https://openalex.org/W2581984549"],"abstract_inverted_index":{"Numerous":[0],"real-world":[1,136,183],"data":[2],"contain":[3],"missing":[4,32,140,151,162],"values,":[5],"while":[6],"in":[7,30,37,42,122,148,188],"contrast,":[8],"most":[9],"Machine":[10,45],"Learning":[11,46],"(ML)":[12],"algorithms":[13,22,40,61],"assume":[14],"complete":[15,146],"datasets.":[16],"For":[17],"this":[18,83,221],"reason,":[19],"several":[20],"imputation":[21,60,93,128,216],"have":[23],"been":[24],"proposed":[25],"to":[26,56,156,234],"predict":[27],"and":[28,107,110,142,185,228],"fill":[29],"the":[31,35,126,173,179,186],"values.":[33],"Given":[34],"advances":[36],"predictive":[38,99,205,219],"modeling":[39,100],"tuned":[41],"an":[43,97],"Automated":[44],"context":[47],"(AutoML)":[48],"setting,":[49],"a":[50,73,103,115,224],"question":[51],"that":[52,172],"naturally":[53],"arises":[54],"is":[55,171,178],"what":[57],"extent":[58],"sophisticated":[59],"(e.g.,":[62],"Neural":[63],"Network":[64],"based)":[65],"are":[66,153,212],"really":[67],"needed,":[68],"or":[69],"we":[70,85,124],"can":[71],"obtain":[72],"descent":[74],"performance":[75],"using":[76],"simple":[77],"methods":[78],"like":[79],"Mean/Mode":[80],"(MM).":[81],"In":[82,195],"article,":[84],"experimentally":[86],"compare":[87],"six":[88],"state-of-the-art":[89],"representatives":[90],"of":[91],"different":[92,157],"algorithmic":[94],"families":[95],"from":[96,168],"AutoML":[98,117],"perspective,":[101],"including":[102],"feature":[104,232],"selection":[105],"step":[106],"combined":[108],"algorithm":[109],"hyper-parameter":[111],"selection.":[112],"We":[113],"used":[114],"commercial":[116],"tool":[118],"for":[119],"our":[120,169],"experiments,":[121],"which":[123,149],"included":[125],"selected":[127],"methods.":[129],"Experiments":[130],"ran":[131],"on":[132,176,182,207],"25":[133],"binary":[134,144,197],"classification":[135,145],"incomplete":[137],"datasets":[138,147,184],"with":[139],"values":[141,152,233],"10":[143],"synthetic":[150],"introduced":[154],"according":[155],"missingness":[158,201],"mechanisms,":[159],"at":[160,223],"varying":[161],"frequencies.":[163],"The":[164],"main":[165],"conclusion":[166],"drawn":[167],"experiments":[170],"best":[174],"method":[175],"average":[177],"Denoise":[180],"AutoEncoder":[181],"MissForest":[187],"simulated":[189],"datasets,":[190],"followed":[191],"closely":[192],"by":[193],"MM.":[194],"addition,":[196],"indicator":[198],"variables":[199],"encoding":[200],"patterns":[202],"actually":[203],"improve":[204],"performance,":[206,220],"average.":[208],"Last,":[209],"although":[210],"there":[211],"cases":[213],"where":[214],"Neural-Network-based":[215],"significantly":[217],"improves":[218],"comes":[222],"great":[225],"computational":[226],"cost":[227],"requires":[229],"measuring":[230],"all":[231],"impute":[235],"new":[236],"samples.":[237]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
