{"id":"https://openalex.org/W1978569543","doi":"https://doi.org/10.1021/ci0001177","title":"QSAR with Few Compounds and Many Features","display_name":"QSAR with Few Compounds and Many Features","publication_year":2001,"publication_date":"2001-01-12","ids":{"openalex":"https://openalex.org/W1978569543","doi":"https://doi.org/10.1021/ci0001177","mag":"1978569543","pmid":"https://pubmed.ncbi.nlm.nih.gov/11410044"},"language":"en","primary_location":{"id":"doi:10.1021/ci0001177","is_oa":false,"landing_page_url":"https://doi.org/10.1021/ci0001177","pdf_url":null,"source":{"id":"https://openalex.org/S171559003","display_name":"Journal of Chemical Information and Computer Sciences","issn_l":"0095-2338","issn":["0095-2338","1520-5142"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Computer Sciences","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011224904","display_name":"Douglas M. Hawkins","orcid":"https://orcid.org/0000-0002-5983-921X"},"institutions":[{"id":"https://openalex.org/I1322780083","display_name":"Minnesota Department of Natural Resources","ror":"https://ror.org/056vcnr65","country_code":"US","type":"government","lineage":["https://openalex.org/I1322780083"]},{"id":"https://openalex.org/I4210115145","display_name":"University of Minnesota, Duluth","ror":"https://ror.org/01hy4qx27","country_code":"US","type":"education","lineage":["https://openalex.org/I4210115145"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Douglas M. Hawkins","raw_affiliation_strings":["School of Statistics, 313 Ford Hall, 224 Church Street S. E., University of Minnesota, Minneapolis, Minnesota 55455, and Natural Resources Research Institute, 5013 Miller Trunk Highway, University of Minnesota, Duluth, Minnesota 55811"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Statistics, 313 Ford Hall, 224 Church Street S. E., University of Minnesota, Minneapolis, Minnesota 55455, and Natural Resources Research Institute, 5013 Miller Trunk Highway, University of Minnesota, Duluth, Minnesota 55811","institution_ids":["https://openalex.org/I1322780083","https://openalex.org/I4210115145"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036559500","display_name":"Subhash C. Basak","orcid":"https://orcid.org/0000-0002-2086-5867"},"institutions":[{"id":"https://openalex.org/I1322780083","display_name":"Minnesota Department of Natural Resources","ror":"https://ror.org/056vcnr65","country_code":"US","type":"government","lineage":["https://openalex.org/I1322780083"]},{"id":"https://openalex.org/I4210115145","display_name":"University of Minnesota, Duluth","ror":"https://ror.org/01hy4qx27","country_code":"US","type":"education","lineage":["https://openalex.org/I4210115145"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Subhash C. Basak","raw_affiliation_strings":["School of Statistics, 313 Ford Hall, 224 Church Street S. E., University of Minnesota, Minneapolis, Minnesota 55455, and Natural Resources Research Institute, 5013 Miller Trunk Highway, University of Minnesota, Duluth, Minnesota 55811"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Statistics, 313 Ford Hall, 224 Church Street S. E., University of Minnesota, Minneapolis, Minnesota 55455, and Natural Resources Research Institute, 5013 Miller Trunk Highway, University of Minnesota, Duluth, Minnesota 55811","institution_ids":["https://openalex.org/I1322780083","https://openalex.org/I4210115145"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109946878","display_name":"Xiaofang Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I1322780083","display_name":"Minnesota Department of Natural Resources","ror":"https://ror.org/056vcnr65","country_code":"US","type":"government","lineage":["https://openalex.org/I1322780083"]},{"id":"https://openalex.org/I4210115145","display_name":"University of Minnesota, Duluth","ror":"https://ror.org/01hy4qx27","country_code":"US","type":"education","lineage":["https://openalex.org/I4210115145"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaofang Shi","raw_affiliation_strings":["School of Statistics, 313 Ford Hall, 224 Church Street S. E., University of Minnesota, Minneapolis, Minnesota 55455, and Natural Resources Research Institute, 5013 Miller Trunk Highway, University of Minnesota, Duluth, Minnesota 55811"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Statistics, 313 Ford Hall, 224 Church Street S. E., University of Minnesota, Minneapolis, Minnesota 55455, and Natural Resources Research Institute, 5013 Miller Trunk Highway, University of Minnesota, Duluth, Minnesota 55811","institution_ids":["https://openalex.org/I1322780083","https://openalex.org/I4210115145"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5036559500"],"corresponding_institution_ids":["https://openalex.org/I1322780083","https://openalex.org/I4210115145"],"apc_list":null,"apc_paid":null,"fwci":6.4116,"has_fulltext":false,"cited_by_count":77,"citation_normalized_percentile":{"value":0.96681107,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"41","issue":"3","first_page":"663","last_page":"670"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10640","display_name":"Spectroscopy and Chemometric Analyses","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1602","display_name":"Analytical Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10908","display_name":"Analytical Chemistry and Chromatography","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantitative-structure\u2013activity-relationship","display_name":"Quantitative structure\u2013activity relationship","score":0.8305816650390625},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.7357519865036011},{"id":"https://openalex.org/keywords/regression","display_name":"Regression","score":0.6173008680343628},{"id":"https://openalex.org/keywords/principal-component-regression","display_name":"Principal component regression","score":0.6120116710662842},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5852794051170349},{"id":"https://openalex.org/keywords/ridge","display_name":"Ridge","score":0.5728600025177002},{"id":"https://openalex.org/keywords/principal-component-analysis","display_name":"Principal component analysis","score":0.5523663759231567},{"id":"https://openalex.org/keywords/underdetermined-system","display_name":"Underdetermined system","score":0.5246137380599976},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5159914493560791},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5072514414787292},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4667494595050812},{"id":"https://openalex.org/keywords/molecular-descriptor","display_name":"Molecular descriptor","score":0.4565865397453308},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.44891175627708435},{"id":"https://openalex.org/keywords/linear-regression","display_name":"Linear regression","score":0.44674596190452576},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.44577479362487793},{"id":"https://openalex.org/keywords/regression-analysis","display_name":"Regression analysis","score":0.4414491355419159},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.39790230989456177},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3400897979736328},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.30847781896591187},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.23816931247711182},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.07159039378166199}],"concepts":[{"id":"https://openalex.org/C164126121","wikidata":"https://www.wikidata.org/wiki/Q766383","display_name":"Quantitative structure\u2013activity relationship","level":2,"score":0.8305816650390625},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.7357519865036011},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.6173008680343628},{"id":"https://openalex.org/C74887250","wikidata":"https://www.wikidata.org/wiki/Q3455892","display_name":"Principal component regression","level":3,"score":0.6120116710662842},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5852794051170349},{"id":"https://openalex.org/C32277403","wikidata":"https://www.wikidata.org/wiki/Q740445","display_name":"Ridge","level":2,"score":0.5728600025177002},{"id":"https://openalex.org/C27438332","wikidata":"https://www.wikidata.org/wiki/Q2873","display_name":"Principal component analysis","level":2,"score":0.5523663759231567},{"id":"https://openalex.org/C179690561","wikidata":"https://www.wikidata.org/wiki/Q4316110","display_name":"Underdetermined system","level":2,"score":0.5246137380599976},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5159914493560791},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5072514414787292},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4667494595050812},{"id":"https://openalex.org/C164923092","wikidata":"https://www.wikidata.org/wiki/Q3705921","display_name":"Molecular descriptor","level":3,"score":0.4565865397453308},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.44891175627708435},{"id":"https://openalex.org/C48921125","wikidata":"https://www.wikidata.org/wiki/Q10861030","display_name":"Linear regression","level":2,"score":0.44674596190452576},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.44577479362487793},{"id":"https://openalex.org/C152877465","wikidata":"https://www.wikidata.org/wiki/Q208042","display_name":"Regression analysis","level":2,"score":0.4414491355419159},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.39790230989456177},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3400897979736328},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.30847781896591187},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.23816931247711182},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.07159039378166199},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015203","descriptor_name":"Reproducibility of Results","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015203","descriptor_name":"Reproducibility of Results","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015203","descriptor_name":"Reproducibility of Results","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015233","descriptor_name":"Models, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015233","descriptor_name":"Models, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015233","descriptor_name":"Models, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D021281","descriptor_name":"Quantitative Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D021281","descriptor_name":"Quantitative Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D021281","descriptor_name":"Quantitative Structure-Activity Relationship","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1021/ci0001177","is_oa":false,"landing_page_url":"https://doi.org/10.1021/ci0001177","pdf_url":null,"source":{"id":"https://openalex.org/S171559003","display_name":"Journal of Chemical Information and Computer Sciences","issn_l":"0095-2338","issn":["0095-2338","1520-5142"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Computer Sciences","raw_type":"journal-article"},{"id":"pmid:11410044","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/11410044","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and computer sciences","raw_type":null},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.107.6965","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.107.6965","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.samsi.info/200304/dmml/web-internal/sfd/Haw2001QSAR.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306107","display_name":"U.S. Environmental Protection Agency","ror":"https://ror.org/03tns0030"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W48332454","https://openalex.org/W623814603","https://openalex.org/W1528905581","https://openalex.org/W1966089218","https://openalex.org/W1990381576","https://openalex.org/W1991980833","https://openalex.org/W2021276449","https://openalex.org/W2032923748","https://openalex.org/W2037261712","https://openalex.org/W2045294557","https://openalex.org/W2047028564","https://openalex.org/W2049496090","https://openalex.org/W2049775299","https://openalex.org/W2060294938","https://openalex.org/W2066772060","https://openalex.org/W2072872824","https://openalex.org/W2075356592","https://openalex.org/W2079775628","https://openalex.org/W2086920981","https://openalex.org/W2091982937","https://openalex.org/W2124181495","https://openalex.org/W2126999940","https://openalex.org/W2898519191","https://openalex.org/W3026285062","https://openalex.org/W3123648075","https://openalex.org/W4210310779","https://openalex.org/W4246393709","https://openalex.org/W4249303080"],"related_works":["https://openalex.org/W2148009427","https://openalex.org/W1518515965","https://openalex.org/W1978931025","https://openalex.org/W3159291941","https://openalex.org/W2063120953","https://openalex.org/W2753230657","https://openalex.org/W2165637039","https://openalex.org/W2026843845","https://openalex.org/W2889092195","https://openalex.org/W2155827364"],"abstract_inverted_index":{"Fitting":[0],"quantitative":[1],"structure-activity":[2],"relationships":[3],"(QSAR)":[4],"requires":[5],"different":[6],"statistical":[7],"methodologies":[8],"and,":[9],"to":[10,108,127],"some":[11],"degree,":[12],"philosophies":[13],"depending":[14],"on":[15,84],"the":[16,19,101,110,137],"\"shape\"":[17],"of":[18,97,142],"data":[20,144],"matrix.":[21],"When":[22],"few":[23,61],"features":[24,59],"are":[25,29,57],"used":[26,124],"and":[27,45,48,53,60,73,82,90,113,130],"there":[28,56],"many":[30,58],"compounds,":[31,62],"it":[32],"is":[33,64],"a":[34,95],"reasonable":[35],"expectation":[36],"that":[37,46],"good":[38,88],"feature":[39,80],"subset":[40],"selection":[41,81],"may":[42,86],"be":[43,51,123],"made":[44],"nonlinearities":[47,129],"nonadditivities":[49],"can":[50,122],"detected":[52],"diagnosed.":[54],"Where":[55],"this":[63],"unrealistic.":[65],"Methods":[66],"such":[67],"as":[68],"ridge":[69,98,111],"regression":[70,76,99,120],"RR,":[71],"PLS,":[72],"principal":[74],"component":[75],"PCR,":[77],"which":[78],"abjure":[79],"rely":[83],"linearity":[85],"provide":[87],"predictions":[89],"fair":[91],"understanding.":[92],"We":[93,135],"report":[94],"development":[96],"for":[100,116],"underdetermined":[102],"case":[103],"by":[104],"using":[105,146],"generalized":[106],"cross-validation":[107],"choose":[109],"constant":[112],"perform":[114],"F-tests":[115],"additional":[117],"information.":[118],"Conventional":[119],"diagnostics":[121],"in":[125],"followup":[126],"identify":[128],"other":[131],"departures":[132],"from":[133],"model.":[134],"illustrate":[136],"approach":[138],"with":[139],"QSAR":[140],"models":[141],"four":[143],"sets":[145],"calculated":[147],"molecular":[148],"descriptors.":[149]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":3}],"updated_date":"2026-06-16T07:32:37.131356","created_date":"2025-10-10T00:00:00"}
