{"id":"https://openalex.org/W2084306359","doi":"https://doi.org/10.1021/ci100022u","title":"Classifying Large Chemical Data Sets: Using A Regularized Potential Function Method","display_name":"Classifying Large Chemical Data Sets: Using A Regularized Potential Function Method","publication_year":2010,"publication_date":"2010-12-15","ids":{"openalex":"https://openalex.org/W2084306359","doi":"https://doi.org/10.1021/ci100022u","mag":"2084306359","pmid":"https://pubmed.ncbi.nlm.nih.gov/21155612"},"language":"en","primary_location":{"id":"doi:10.1021/ci100022u","is_oa":false,"landing_page_url":"https://doi.org/10.1021/ci100022u","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001996295","display_name":"Hamse Y. Mussa","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Hamse Y. Mussa","raw_affiliation_strings":["Unilever Centre for Molecular Sciences Informatics, Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge CB2 1EW, United Kingdom and CPC, LFP, Lead Discovery Informatics, Novartis Institutes for BioMedical Research, Inc., 250 Massachusetts Avenue Cambridge, Massachusetts 02139, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Unilever Centre for Molecular Sciences Informatics, Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge CB2 1EW, United Kingdom and CPC, LFP, Lead Discovery Informatics, Novartis Institutes for BioMedical Research, Inc., 250 Massachusetts Avenue Cambridge, Massachusetts 02139, United States","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012357306","display_name":"Lezan Hawizy","orcid":"https://orcid.org/0000-0002-0932-3135"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Lezan Hawizy","raw_affiliation_strings":["Unilever Centre for Molecular Sciences Informatics, Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge CB2 1EW, United Kingdom and CPC, LFP, Lead Discovery Informatics, Novartis Institutes for BioMedical Research, Inc., 250 Massachusetts Avenue Cambridge, Massachusetts 02139, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Unilever Centre for Molecular Sciences Informatics, Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge CB2 1EW, United Kingdom and CPC, LFP, Lead Discovery Informatics, Novartis Institutes for BioMedical Research, Inc., 250 Massachusetts Avenue Cambridge, Massachusetts 02139, United States","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059360993","display_name":"Florian Nigsch","orcid":"https://orcid.org/0000-0002-2919-8749"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Florian Nigsch","raw_affiliation_strings":["Unilever Centre for Molecular Sciences Informatics, Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge CB2 1EW, United Kingdom and CPC, LFP, Lead Discovery Informatics, Novartis Institutes for BioMedical Research, Inc., 250 Massachusetts Avenue Cambridge, Massachusetts 02139, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Unilever Centre for Molecular Sciences Informatics, Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge CB2 1EW, United Kingdom and CPC, LFP, Lead Discovery Informatics, Novartis Institutes for BioMedical Research, Inc., 250 Massachusetts Avenue Cambridge, Massachusetts 02139, United States","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014283781","display_name":"Robert C. Glen","orcid":"https://orcid.org/0000-0003-1759-2914"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Robert C. Glen","raw_affiliation_strings":["Unilever Centre for Molecular Sciences Informatics, Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge CB2 1EW, United Kingdom and CPC, LFP, Lead Discovery Informatics, Novartis Institutes for BioMedical Research, Inc., 250 Massachusetts Avenue Cambridge, Massachusetts 02139, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Unilever Centre for Molecular Sciences Informatics, Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge CB2 1EW, United Kingdom and CPC, LFP, Lead Discovery Informatics, Novartis Institutes for BioMedical Research, Inc., 250 Massachusetts Avenue Cambridge, Massachusetts 02139, United States","institution_ids":["https://openalex.org/I241749"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5769,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.70221548,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"51","issue":"1","first_page":"4","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10640","display_name":"Spectroscopy and Chemometric Analyses","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1602","display_name":"Analytical Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10640","display_name":"Spectroscopy and Chemometric Analyses","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1602","display_name":"Analytical Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11324","display_name":"Spectroscopy Techniques in Biomedical and Chemical Research","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.7227338552474976},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6518365740776062},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.6242423057556152},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5913446545600891},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5657463073730469},{"id":"https://openalex.org/keywords/perceptron","display_name":"Perceptron","score":0.5619999766349792},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5343282222747803},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5189849734306335},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.4810088574886322},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.471696138381958},{"id":"https://openalex.org/keywords/kernel-method","display_name":"Kernel method","score":0.4241951107978821},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3521970212459564},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.25634706020355225}],"concepts":[{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.7227338552474976},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6518365740776062},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.6242423057556152},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5913446545600891},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5657463073730469},{"id":"https://openalex.org/C60908668","wikidata":"https://www.wikidata.org/wiki/Q690207","display_name":"Perceptron","level":3,"score":0.5619999766349792},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5343282222747803},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5189849734306335},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.4810088574886322},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.471696138381958},{"id":"https://openalex.org/C122280245","wikidata":"https://www.wikidata.org/wiki/Q620622","display_name":"Kernel method","level":3,"score":0.4241951107978821},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3521970212459564},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.25634706020355225},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D002621","descriptor_name":"Chemistry","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D002621","descriptor_name":"Chemistry","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D002621","descriptor_name":"Chemistry","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D002965","descriptor_name":"Classification","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D002965","descriptor_name":"Classification","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D002965","descriptor_name":"Classification","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D003657","descriptor_name":"Decision Making","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003657","descriptor_name":"Decision Making","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003657","descriptor_name":"Decision Making","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016002","descriptor_name":"Discriminant Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016002","descriptor_name":"Discriminant Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016002","descriptor_name":"Discriminant Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017711","descriptor_name":"Nonlinear Dynamics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017711","descriptor_name":"Nonlinear Dynamics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017711","descriptor_name":"Nonlinear Dynamics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D048088","descriptor_name":"Informatics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D048088","descriptor_name":"Informatics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D048088","descriptor_name":"Informatics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1021/ci100022u","is_oa":false,"landing_page_url":"https://doi.org/10.1021/ci100022u","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:21155612","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/21155612","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5299999713897705},{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.4000000059604645}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W26816478","https://openalex.org/W1494575750","https://openalex.org/W1503196035","https://openalex.org/W1510073064","https://openalex.org/W1536628706","https://openalex.org/W1554104828","https://openalex.org/W1604938182","https://openalex.org/W1983610905","https://openalex.org/W1986280275","https://openalex.org/W2007154098","https://openalex.org/W2020756525","https://openalex.org/W2031248101","https://openalex.org/W2037982830","https://openalex.org/W2043076104","https://openalex.org/W2076959408","https://openalex.org/W2081123119","https://openalex.org/W2094125466","https://openalex.org/W2110652811","https://openalex.org/W2117812871","https://openalex.org/W2119821739","https://openalex.org/W2124776405","https://openalex.org/W2135346934","https://openalex.org/W2139212933","https://openalex.org/W2156512439","https://openalex.org/W2156909104","https://openalex.org/W2196714401","https://openalex.org/W2614228683","https://openalex.org/W2800394774","https://openalex.org/W2911964244","https://openalex.org/W3101749733","https://openalex.org/W4212863985","https://openalex.org/W4235395760","https://openalex.org/W4249572517","https://openalex.org/W4252684946","https://openalex.org/W4298876635"],"related_works":["https://openalex.org/W4362597605","https://openalex.org/W1574414179","https://openalex.org/W3107204728","https://openalex.org/W2150029999","https://openalex.org/W2988321605","https://openalex.org/W3099518927","https://openalex.org/W3162172987","https://openalex.org/W2830281438","https://openalex.org/W3027275906","https://openalex.org/W6717794"],"abstract_inverted_index":{"In":[0,152],"recent":[1],"years":[2],"classifiers":[3,46,177,186,218],"generated":[4],"with":[5,118,169,222],"kernel-based":[6,45,70,78,88,191],"methods,":[7],"such":[8],"as":[9,68,183,185],"support":[10],"vector":[11],"machines":[12],"(SVM),":[13],"Gaussian":[14],"processes":[15],"(GP),":[16],"regularization":[17,172],"networks":[18],"(RN),":[19],"and":[20,73,96,104,112,122,142,233],"binary":[21,176,205],"kernel":[22],"discrimination":[23],"(BKD)":[24],"have":[25],"been":[26,147],"very":[27],"popular":[28],"in":[29,47,134,150,181],"chemoinformatics":[30],"data":[31,211],"analysis.":[32],"Aizerman":[33],"et":[34],"al.":[35],"were":[36,219],"the":[37,41,48,59,76,101,137,198,213,223],"first":[38],"to":[39,98,125,203],"introduce":[40],"notion":[42],"of":[43,50,216,226],"employing":[44,189],"area":[49],"pattern":[51],"recognition.":[52],"Their":[53],"original":[54],"scheme,":[55],"which":[56],"they":[57],"termed":[58],"potential":[60],"function":[61],"method":[62,144],"(PFM),":[63],"can":[64,81,179],"basically":[65],"be":[66,82],"viewed":[67],"a":[69,170,194,208],"perceptron":[71],"procedure":[72],"arguably":[74],"subsumes":[75],"modern":[77,87],"algorithms.":[79,106],"PFM":[80,91,114,167,200,217],"computationally":[83],"much":[84],"cheaper":[85],"than":[86,100],"classifiers;":[89],"furthermore,":[90],"is":[92,115],"far":[93],"simpler":[94],"conceptually":[95],"easier":[97],"implement":[99],"SVM,":[102,110],"GP,":[103,111],"RN":[105],"Unfortunately,":[107],"unlike,":[108],"e.g.,":[109],"RN,":[113],"not":[116,146],"endowed":[117],"both":[119],"theoretical":[120],"guarantees":[121],"practical":[123],"strategies":[124],"safeguard":[126],"it":[127],"against":[128],"generating":[129],"overfitting":[130],"classifiers.":[131,206,235],"This":[132],"is,":[133],"our":[135],"opinion,":[136],"reason":[138],"why":[139],"this":[140,153,158],"simple":[141,171],"elegant":[143],"has":[145],"taken":[148],"up":[149],"chemoinformatics.":[151],"paper":[154],"we":[155,164],"empirically":[156],"address":[157],"drawback:":[159],"while":[160],"maintaining":[161],"its":[162],"simplicity,":[163],"demonstrate":[165],"that":[166,178],"combined":[168],"scheme":[173],"may":[174],"yield":[175],"be,":[180],"practice,":[182],"efficient":[184],"obtained":[187],"by":[188],"state-of-the-art":[190],"methods.":[192],"Using":[193,207],"realistic":[195],"classification":[196],"example,":[197],"augmented":[199],"was":[201],"used":[202],"generate":[204],"large":[209],"chemical":[210],"set,":[212],"generalization":[214],"ability":[215],"then":[220],"compared":[221],"prediction":[224],"power":[225],"Laplacian-modified":[227],"naive":[228],"Bayesian":[229],"(LmNB),":[230],"Winnow":[231],"(WN),":[232],"SVM":[234]},"counts_by_year":[{"year":2020,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":2},{"year":2012,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
