{"id":"https://openalex.org/W2015928143","doi":"https://doi.org/10.1021/ci025626i","title":"Assessing Model Fit by Cross-Validation","display_name":"Assessing Model Fit by Cross-Validation","publication_year":2003,"publication_date":"2003-01-24","ids":{"openalex":"https://openalex.org/W2015928143","doi":"https://doi.org/10.1021/ci025626i","mag":"2015928143","pmid":"https://pubmed.ncbi.nlm.nih.gov/12653524"},"language":"en","primary_location":{"id":"doi:10.1021/ci025626i","is_oa":false,"landing_page_url":"https://doi.org/10.1021/ci025626i","pdf_url":null,"source":{"id":"https://openalex.org/S171559003","display_name":"Journal of Chemical Information and Computer Sciences","issn_l":"0095-2338","issn":["0095-2338","1520-5142"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Computer Sciences","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011224904","display_name":"Douglas M. Hawkins","orcid":"https://orcid.org/0000-0002-5983-921X"},"institutions":[{"id":"https://openalex.org/I1322780083","display_name":"Minnesota Department of Natural Resources","ror":"https://ror.org/056vcnr65","country_code":"US","type":"government","lineage":["https://openalex.org/I1322780083"]},{"id":"https://openalex.org/I4210115145","display_name":"University of Minnesota, Duluth","ror":"https://ror.org/01hy4qx27","country_code":"US","type":"education","lineage":["https://openalex.org/I4210115145"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Douglas M. Hawkins","raw_affiliation_strings":["School of Statistics, University of Minnesota, Minneapolis, Minnesota 55455, and Natural Resources Research Institute, University of MinnesotaDuluth, 5013 Miller Trunk Highway, Duluth, Minnesota 55811"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Statistics, University of Minnesota, Minneapolis, Minnesota 55455, and Natural Resources Research Institute, University of MinnesotaDuluth, 5013 Miller Trunk Highway, Duluth, Minnesota 55811","institution_ids":["https://openalex.org/I1322780083","https://openalex.org/I4210115145"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036559500","display_name":"Subhash C. Basak","orcid":"https://orcid.org/0000-0002-2086-5867"},"institutions":[{"id":"https://openalex.org/I1322780083","display_name":"Minnesota Department of Natural Resources","ror":"https://ror.org/056vcnr65","country_code":"US","type":"government","lineage":["https://openalex.org/I1322780083"]},{"id":"https://openalex.org/I4210115145","display_name":"University of Minnesota, Duluth","ror":"https://ror.org/01hy4qx27","country_code":"US","type":"education","lineage":["https://openalex.org/I4210115145"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Subhash C. Basak","raw_affiliation_strings":["School of Statistics, University of Minnesota, Minneapolis, Minnesota 55455, and Natural Resources Research Institute, University of MinnesotaDuluth, 5013 Miller Trunk Highway, Duluth, Minnesota 55811"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Statistics, University of Minnesota, Minneapolis, Minnesota 55455, and Natural Resources Research Institute, University of MinnesotaDuluth, 5013 Miller Trunk Highway, Duluth, Minnesota 55811","institution_ids":["https://openalex.org/I1322780083","https://openalex.org/I4210115145"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056024291","display_name":"Denise Mills","orcid":null},"institutions":[{"id":"https://openalex.org/I1322780083","display_name":"Minnesota Department of Natural Resources","ror":"https://ror.org/056vcnr65","country_code":"US","type":"government","lineage":["https://openalex.org/I1322780083"]},{"id":"https://openalex.org/I4210115145","display_name":"University of Minnesota, Duluth","ror":"https://ror.org/01hy4qx27","country_code":"US","type":"education","lineage":["https://openalex.org/I4210115145"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Denise Mills","raw_affiliation_strings":["School of Statistics, University of Minnesota, Minneapolis, Minnesota 55455, and Natural Resources Research Institute, University of MinnesotaDuluth, 5013 Miller Trunk Highway, Duluth, Minnesota 55811"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Statistics, University of Minnesota, Minneapolis, Minnesota 55455, and Natural Resources Research Institute, University of MinnesotaDuluth, 5013 Miller Trunk Highway, Duluth, Minnesota 55811","institution_ids":["https://openalex.org/I1322780083","https://openalex.org/I4210115145"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":23.2962,"has_fulltext":false,"cited_by_count":762,"citation_normalized_percentile":{"value":0.99749133,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"43","issue":"2","first_page":"579","last_page":"586"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.9498999714851379,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cross-validation","display_name":"Cross-validation","score":0.7197271585464478},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6255716681480408},{"id":"https://openalex.org/keywords/model-validation","display_name":"Model validation","score":0.5965222120285034},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5905940532684326},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5361684560775757},{"id":"https://openalex.org/keywords/quantitative-structure\u2013activity-relationship","display_name":"Quantitative structure\u2013activity relationship","score":0.5158869028091431},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4527178704738617},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.43097931146621704},{"id":"https://openalex.org/keywords/sample-size-determination","display_name":"Sample size determination","score":0.4229260981082916},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.41229188442230225},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.34112322330474854},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31853628158569336},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2752651572227478},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24132347106933594},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.08232313394546509}],"concepts":[{"id":"https://openalex.org/C27181475","wikidata":"https://www.wikidata.org/wiki/Q541014","display_name":"Cross-validation","level":2,"score":0.7197271585464478},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6255716681480408},{"id":"https://openalex.org/C3019813237","wikidata":"https://www.wikidata.org/wiki/Q65089264","display_name":"Model validation","level":2,"score":0.5965222120285034},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5905940532684326},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5361684560775757},{"id":"https://openalex.org/C164126121","wikidata":"https://www.wikidata.org/wiki/Q766383","display_name":"Quantitative structure\u2013activity relationship","level":2,"score":0.5158869028091431},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4527178704738617},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.43097931146621704},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.4229260981082916},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.41229188442230225},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.34112322330474854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31853628158569336},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2752651572227478},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24132347106933594},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.08232313394546509},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1021/ci025626i","is_oa":false,"landing_page_url":"https://doi.org/10.1021/ci025626i","pdf_url":null,"source":{"id":"https://openalex.org/S171559003","display_name":"Journal of Chemical Information and Computer Sciences","issn_l":"0095-2338","issn":["0095-2338","1520-5142"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Computer Sciences","raw_type":"journal-article"},{"id":"pmid:12653524","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/12653524","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and computer sciences","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320332467","display_name":"U.S. Air Force","ror":"https://ror.org/006gmme17"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W25566965","https://openalex.org/W1531123763","https://openalex.org/W1978569543","https://openalex.org/W1981251392","https://openalex.org/W1990381576","https://openalex.org/W1998797952","https://openalex.org/W2002678574","https://openalex.org/W2033872649","https://openalex.org/W2033914841","https://openalex.org/W2047104567","https://openalex.org/W2049216662","https://openalex.org/W2049496090","https://openalex.org/W2049775299","https://openalex.org/W2050297026","https://openalex.org/W2056075852","https://openalex.org/W2057192556","https://openalex.org/W2063261057","https://openalex.org/W2079100340","https://openalex.org/W2079775628","https://openalex.org/W2087661061","https://openalex.org/W2124181495"],"related_works":["https://openalex.org/W816105089","https://openalex.org/W2100523380","https://openalex.org/W4318240167","https://openalex.org/W3048572280","https://openalex.org/W2019765489","https://openalex.org/W4253742790","https://openalex.org/W2023130417","https://openalex.org/W3011444647","https://openalex.org/W2514173981","https://openalex.org/W4254524906"],"abstract_inverted_index":{"When":[0],"QSAR":[1,86],"models":[2],"are":[3,35],"fitted,":[4],"it":[5,15,109,117],"is":[6,16,63,95,113,118,128],"important":[7],"to":[8,24,66,71,121],"validate":[9],"any":[10],"fitted":[11],"model-to":[12],"check":[13],"that":[14,18,89,116,126],"plausible":[17],"its":[19,73],"predictions":[20],"will":[21],"carry":[22],"over":[23],"fresh":[25],"data":[26,87],"not":[27],"used":[28,64],"in":[29,55],"the":[30,48,57,68,91,97,103],"model":[31,69],"fitting":[32],"exercise.":[33],"There":[34],"two":[36],"standard":[37],"ways":[38],"of":[39,60,83,108],"doing":[40],"this-using":[41],"a":[42,84,106],"separate":[43],"hold-out":[44],"test":[45],"sample":[46,93],"and":[47,70,80,115],"computationally":[49],"much":[50,119],"more":[51],"burdensome":[52],"leave-one-out":[53],"cross-validation":[54],"which":[56],"entire":[58],"pool":[59],"available":[61,92],"compounds":[62],"both":[65],"fit":[67],"assess":[72],"validity.":[74],"We":[75],"show":[76],"by":[77],"theoretical":[78],"argument":[79],"empiric":[81],"study":[82],"large":[85],"set":[88],"when":[90],"size":[94],"small-in":[96],"dozens":[98],"or":[99],"scores":[100],"rather":[101],"than":[102],"hundreds,":[104],"holding":[105],"portion":[107],"back":[110],"for":[111],"testing":[112],"wasteful,":[114],"better":[120],"use":[122],"cross-validation,":[123],"but":[124],"ensure":[125],"this":[127],"done":[129],"properly.":[130]},"counts_by_year":[{"year":2026,"cited_by_count":14},{"year":2025,"cited_by_count":36},{"year":2024,"cited_by_count":33},{"year":2023,"cited_by_count":36},{"year":2022,"cited_by_count":47},{"year":2021,"cited_by_count":56},{"year":2020,"cited_by_count":34},{"year":2019,"cited_by_count":39},{"year":2018,"cited_by_count":42},{"year":2017,"cited_by_count":30},{"year":2016,"cited_by_count":34},{"year":2015,"cited_by_count":44},{"year":2014,"cited_by_count":29},{"year":2013,"cited_by_count":33},{"year":2012,"cited_by_count":35}],"updated_date":"2026-06-18T10:00:31.954636","created_date":"2025-10-10T00:00:00"}
