{"id":"https://openalex.org/W4413942118","doi":"https://doi.org/10.1021/acs.jcim.5c00395","title":"All That Glitters Is Not Gold: Importance of Rigorous Evaluation of Proteochemometric Models","display_name":"All That Glitters Is Not Gold: Importance of Rigorous Evaluation of Proteochemometric Models","publication_year":2025,"publication_date":"2025-09-03","ids":{"openalex":"https://openalex.org/W4413942118","doi":"https://doi.org/10.1021/acs.jcim.5c00395","pmid":"https://pubmed.ncbi.nlm.nih.gov/40899589"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.5c00395","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c00395","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5c00395?ref=article_openPDF","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5c00395?ref=article_openPDF","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061391376","display_name":"Polina Avdiunina","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Polina Avdiunina","raw_affiliation_strings":["Carnegie Mellon University","Department of Chemistry, Mellon College of Science","Department of Chemistry, Mellon College of Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Department of Chemistry, Mellon College of Science","institution_ids":[]},{"raw_affiliation_string":"Department of Chemistry, Mellon College of Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115984813","display_name":"Shamieraah Jamal","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shamieraah Jamal","raw_affiliation_strings":["Carnegie Mellon University","Computational Biology Department, School of Computer Science","Department of Chemistry, Mellon College of Science","Department of Chemistry, Mellon College of Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States","Computational Biology Department, School of Computer Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States"],"raw_orcid":"https://orcid.org/0009-0002-8548-4712","affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Computational Biology Department, School of Computer Science","institution_ids":[]},{"raw_affiliation_string":"Department of Chemistry, Mellon College of Science","institution_ids":[]},{"raw_affiliation_string":"Department of Chemistry, Mellon College of Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Computational Biology Department, School of Computer Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011287493","display_name":"Filipp Gusev","orcid":"https://orcid.org/0000-0002-1167-345X"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Filipp Gusev","raw_affiliation_strings":["Carnegie Mellon University","Computational Biology Department, School of Computer Science","Department of Chemistry, Mellon College of Science","Department of Chemistry, Mellon College of Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States","Computational Biology Department, School of Computer Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States"],"raw_orcid":"https://orcid.org/0000-0002-1167-345X","affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Computational Biology Department, School of Computer Science","institution_ids":[]},{"raw_affiliation_string":"Department of Chemistry, Mellon College of Science","institution_ids":[]},{"raw_affiliation_string":"Department of Chemistry, Mellon College of Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Computational Biology Department, School of Computer Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011932992","display_name":"Olexandr Isayev","orcid":"https://orcid.org/0000-0001-7581-8497"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Olexandr Isayev","raw_affiliation_strings":["Carnegie Mellon University","Computational Biology Department, School of Computer Science","Department of Chemistry, Mellon College of Science","Computational Biology Department, School of Computer Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States","Department of Chemistry, Mellon College of Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States"],"raw_orcid":"https://orcid.org/0000-0001-7581-8497","affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Computational Biology Department, School of Computer Science","institution_ids":[]},{"raw_affiliation_string":"Department of Chemistry, Mellon College of Science","institution_ids":[]},{"raw_affiliation_string":"Computational Biology Department, School of Computer Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Department of Chemistry, Mellon College of Science, Carnegie Mellon University, Pittsburgh, Pennsylvania 15213, United States","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5011932992"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19309773,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"65","issue":"19","first_page":"10239","last_page":"10252"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gold-standard","display_name":"Gold standard (test)","score":0.42918965220451355},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3692806363105774},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.36014485359191895},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15432894229888916},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.07571133971214294}],"concepts":[{"id":"https://openalex.org/C40993552","wikidata":"https://www.wikidata.org/wiki/Q514654","display_name":"Gold standard (test)","level":2,"score":0.42918965220451355},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3692806363105774},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.36014485359191895},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15432894229888916},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.07571133971214294}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008024","descriptor_name":"Ligands","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008024","descriptor_name":"Ligands","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008024","descriptor_name":"Ligands","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":true},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D055808","descriptor_name":"Drug Discovery","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1021/acs.jcim.5c00395","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c00395","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5c00395?ref=article_openPDF","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:40899589","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40899589","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null},{"id":"pmh:oai:europepmc.org:11337878","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12529762","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1021/acs.jcim.5c00395","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c00395","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5c00395?ref=article_openPDF","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4920794147","display_name":null,"funder_award_id":"CHE-2154447","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4413942118.pdf","grobid_xml":"https://content.openalex.org/works/W4413942118.grobid-xml"},"referenced_works_count":59,"referenced_works":["https://openalex.org/W1982131304","https://openalex.org/W2024213882","https://openalex.org/W2035585923","https://openalex.org/W2051627306","https://openalex.org/W2057069496","https://openalex.org/W2064264167","https://openalex.org/W2074231493","https://openalex.org/W2078047353","https://openalex.org/W2086286404","https://openalex.org/W2088970363","https://openalex.org/W2102245393","https://openalex.org/W2115888213","https://openalex.org/W2148512505","https://openalex.org/W2151338484","https://openalex.org/W2152630148","https://openalex.org/W2214665483","https://openalex.org/W2536259549","https://openalex.org/W2600971009","https://openalex.org/W2608559058","https://openalex.org/W2620843825","https://openalex.org/W2782718811","https://openalex.org/W2785947426","https://openalex.org/W2902812092","https://openalex.org/W2949676527","https://openalex.org/W2969325194","https://openalex.org/W2980234582","https://openalex.org/W2992752586","https://openalex.org/W2995998725","https://openalex.org/W2997958114","https://openalex.org/W3005364306","https://openalex.org/W3008726875","https://openalex.org/W3014805132","https://openalex.org/W3030018498","https://openalex.org/W3036230820","https://openalex.org/W3091671345","https://openalex.org/W3093634533","https://openalex.org/W3096561213","https://openalex.org/W3173174268","https://openalex.org/W3177500196","https://openalex.org/W3177828909","https://openalex.org/W4200139236","https://openalex.org/W4281381643","https://openalex.org/W4283013935","https://openalex.org/W4321214126","https://openalex.org/W4327550249","https://openalex.org/W4380083000","https://openalex.org/W4385520996","https://openalex.org/W4386044113","https://openalex.org/W4386453564","https://openalex.org/W4388221506","https://openalex.org/W4388627063","https://openalex.org/W4389611037","https://openalex.org/W4389941191","https://openalex.org/W4391542588","https://openalex.org/W4392093325","https://openalex.org/W4396610000","https://openalex.org/W4396721167","https://openalex.org/W4399975048","https://openalex.org/W4404458588"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W832193342"],"abstract_inverted_index":{"Proteochemometric":[0],"models":[1,160],"(PCMs)":[2],"are":[3,100],"used":[4],"in":[5,87,111],"computational":[6],"drug":[7],"discovery":[8],"to":[9,30,142,155,161],"employ":[10],"both":[11],"protein":[12,138],"and":[13,24,74,84,97,122,164],"ligand":[14],"representations":[15],"jointly":[16],"for":[17,38,79,148],"bioactivity":[18,55],"prediction.":[19],"While":[20],"machine":[21],"learning":[22,26],"(ML)":[23],"deep":[25],"(DL)":[27],"have":[28,44],"come":[29],"dominate":[31],"PCMs,":[32],"often":[33],"serving":[34],"as":[35,57],"a":[36,58],"basis":[37],"scoring":[39],"functions,":[40],"rigorous":[41],"evaluation":[42,153],"standards":[43,154],"not":[45],"always":[46],"been":[47],"consistently":[48,135],"applied.":[49],"In":[50],"this":[51],"study,":[52],"using":[53],"kinase-ligand":[54],"prediction":[56],"model":[59,89],"system,":[60],"we":[61],"highlight":[62],"the":[63,101,109,112,149,157],"critical":[64,103],"roles":[65],"of":[66,115,151,159],"data":[67,76,82,95,163],"set":[68],"curation,":[69],"permutation":[70,133],"testing,":[71],"class":[72,98],"imbalances,":[73],"various":[75,119],"splitting":[77,96],"strategies":[78],"mitigating":[80],"plausible":[81],"leakage":[83],"embedding":[85],"quality":[86],"determining":[88],"performance.":[90],"Our":[91],"findings":[92],"indicate":[93],"that":[94,137],"imbalances":[99],"most":[102],"factors":[104],"affecting":[105],"PCM":[106,143],"performance,":[107],"emphasizing":[108],"challenges":[110],"generalizing":[113],"ability":[114],"ML/DL-PCMs.":[116],"We":[117],"evaluated":[118],"protein-ligand":[120],"descriptors":[121],"embeddings,":[123],"including":[124],"those":[125],"augmented":[126],"with":[127],"multiple":[128],"sequence":[129],"alignment":[130],"information.":[131],"However,":[132],"testing":[134],"demonstrated":[136],"embeddings":[139],"contributed":[140],"minimally":[141],"efficacy.":[144],"This":[145],"study":[146],"advocates":[147],"adoption":[150],"stringent":[152],"enhance":[156],"generalizability":[158],"out-of-distribution":[162],"improve":[165],"benchmarking":[166],"practices.":[167]},"counts_by_year":[],"updated_date":"2026-05-07T13:39:58.223016","created_date":"2025-10-10T00:00:00"}
