{"id":"https://openalex.org/W53754145","doi":"https://doi.org/10.3233/978-1-60750-939-4-269","title":"A preprocessing method for improving data mining techniques. Application to a large medical diabetes database","display_name":"A preprocessing method for improving data mining techniques. Application to a large medical diabetes database","publication_year":2003,"publication_date":"2003-01-01","ids":{"openalex":"https://openalex.org/W53754145","doi":"https://doi.org/10.3233/978-1-60750-939-4-269","mag":"53754145","pmid":"https://pubmed.ncbi.nlm.nih.gov/14663998"},"language":"en","primary_location":{"id":"doi:10.3233/978-1-60750-939-4-269","is_oa":false,"landing_page_url":"https://doi.org/10.3233/978-1-60750-939-4-269","pdf_url":null,"source":{"id":"https://openalex.org/S4210179765","display_name":"Studies in health technology and informatics","issn_l":"0926-9630","issn":["0926-9630","1879-8365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in Health Technology and Informatics","raw_type":"book-chapter"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005205127","display_name":"Alain Duhamel","orcid":"https://orcid.org/0000-0003-0392-7701"},"institutions":[{"id":"https://openalex.org/I181827316","display_name":"Universit\u00e9 Catholique de Lille","ror":"https://ror.org/025s1b152","country_code":"FR","type":"education","lineage":["https://openalex.org/I181827316"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Duhamel A","raw_affiliation_strings":["CERIM-Facult\u00e9 de M\u00e9decine-1, Place de Verdun-59045 Lille, France"],"affiliations":[{"raw_affiliation_string":"CERIM-Facult\u00e9 de M\u00e9decine-1, Place de Verdun-59045 Lille, France","institution_ids":["https://openalex.org/I181827316"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046749082","display_name":"Nuttens Mc","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nuttens Mc","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051655575","display_name":"Patrick Devos","orcid":"https://orcid.org/0000-0001-7803-9552"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Devos P","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065620655","display_name":"M Picavet","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Picavet M","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5070528092","display_name":"R. Beuscart","orcid":"https://orcid.org/0000-0001-7218-0643"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Beuscart R","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5005205127"],"corresponding_institution_ids":["https://openalex.org/I181827316"],"apc_list":null,"apc_paid":null,"fwci":2.0772,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.85545194,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"95","issue":null,"first_page":"269","last_page":"74"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11396","display_name":"Artificial Intelligence in Healthcare","score":0.9659000039100647,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.9052000045776367,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.9127658009529114},{"id":"https://openalex.org/keywords/imputation","display_name":"Imputation (statistics)","score":0.7642934322357178},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.7311877608299255},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.6749608516693115},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6638622283935547},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5914018154144287},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.46155381202697754},{"id":"https://openalex.org/keywords/multivariate-statistics","display_name":"Multivariate statistics","score":0.4507449269294739},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.4342513680458069},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.18148523569107056},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.1570378541946411}],"concepts":[{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.9127658009529114},{"id":"https://openalex.org/C58041806","wikidata":"https://www.wikidata.org/wiki/Q1660484","display_name":"Imputation (statistics)","level":3,"score":0.7642934322357178},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.7311877608299255},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.6749608516693115},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6638622283935547},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5914018154144287},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.46155381202697754},{"id":"https://openalex.org/C161584116","wikidata":"https://www.wikidata.org/wiki/Q1952580","display_name":"Multivariate statistics","level":2,"score":0.4507449269294739},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.4342513680458069},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.18148523569107056},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.1570378541946411}],"mesh":[{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003920","descriptor_name":"Diabetes Mellitus","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003920","descriptor_name":"Diabetes Mellitus","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003920","descriptor_name":"Diabetes Mellitus","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D005602","descriptor_name":"France","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D005602","descriptor_name":"France","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D005602","descriptor_name":"France","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008492","descriptor_name":"Medical Informatics Computing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008492","descriptor_name":"Medical Informatics Computing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008492","descriptor_name":"Medical Informatics Computing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000592","qualifier_name":"standards","is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000592","qualifier_name":"standards","is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000592","qualifier_name":"standards","is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.3233/978-1-60750-939-4-269","is_oa":false,"landing_page_url":"https://doi.org/10.3233/978-1-60750-939-4-269","pdf_url":null,"source":{"id":"https://openalex.org/S4210179765","display_name":"Studies in health technology and informatics","issn_l":"0926-9630","issn":["0926-9630","1879-8365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in Health Technology and Informatics","raw_type":"book-chapter"},{"id":"pmid:14663998","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/14663998","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in health technology and informatics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2181530120","https://openalex.org/W4211215373","https://openalex.org/W2024529227","https://openalex.org/W2055961818","https://openalex.org/W2903115227","https://openalex.org/W1574575415","https://openalex.org/W3144172081","https://openalex.org/W3179858851","https://openalex.org/W2081476516","https://openalex.org/W2581984549"],"abstract_inverted_index":{"The":[0,56,101,126],"Knowledge":[1],"Discovery":[2],"in":[3],"Databases":[4],"(KDD)":[5],"methodology":[6],"seems":[7],"to":[8,63,70,113,120,139,180,195],"be":[9,209],"attractive":[10],"on":[11,107,117,162,183],"the":[12,19,22,37,40,52,59,81,94,122,148,188,205],"analyze":[13,64],"of":[14,29,39,51,58,90,93,110,133,176,190,198,218],"large":[15,164],"clinical":[16],"databases.":[17],"In":[18],"KDD":[20],"process,":[21],"preprocessing":[23],"step":[24,66],"(data":[25],"cleaning":[26,103],"and":[27,47,67,74,116,153,223],"handling":[28,98],"missing":[30,75,91,99,142,172,219],"values)":[31],"is":[32],"paramount":[33],"since":[34],"it":[35],"conditions":[36],"quality":[38],"results":[41,233],"obtained":[42],"by":[43,131,147,236],"data":[44,73,87,102],"mining":[45],"procedures":[46],"represents":[48],"about":[49],"80%":[50],"whole":[53],"project":[54],"time.":[55],"aims":[57],"present":[60],"study":[61,89,159],"were":[62,144],"this":[65],"provide":[68],"tools":[69],"handle":[71],"inconsistent":[72],"values.":[76,100,173],"We":[77],"have":[78],"broken":[79],"down":[80],"process":[82],"into":[83],"3":[84],"main":[85],"stages:":[86],"cleaning--explanatory":[88],"values--choice":[92],"procedure":[95],"used":[96],"for":[97],"stage":[104],"was":[105,129,160],"based":[106],"a":[108,163],"system":[109,175],"logical":[111,177],"rules":[112,178],"correct":[114,181],"mistakes":[115,182],"cluster":[118],"analysis":[119,193],"discard":[121],"poorly":[123,199],"filled":[124,200],"files.":[125,201],"missing-data":[127,206],"mechanism":[128,207],"analyzed":[130],"means":[132],"multivariate":[134,203],"statistical":[135],"procedures.":[136],"Two":[137],"methods":[138],"deal":[140],"with":[141,215],"values":[143,220],"compared:":[145],"imputation":[146,154,227,235],"most":[149],"common":[150],"value":[151],"(mode)":[152],"using":[155,228],"decision":[156,229],"trees.":[157],"This":[158],"performed":[161],"medical":[165],"diabetes":[166],"database":[167],"(23,601":[168],"patients)":[169],"including":[170],"numerous":[171],"A":[174],"allowed":[179,194],"essential":[184],"parameters":[185],"(for":[186],"example,":[187],"type":[189],"diabetes).":[191],"Cluster":[192],"identify":[196],"10%":[197],"After":[202],"analysis,":[204],"could":[208],"considered":[210],"as":[211],"random.":[212],"For":[213],"variables":[214],"low":[216],"number":[217],"(<":[221,225],"10%)":[222],"categories":[224],"4),":[226],"trees":[230],"provided":[231],"better":[232],"than":[234],"mode.":[237]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
