{"id":"https://openalex.org/W4386847912","doi":"https://doi.org/10.1186/s13321-023-00755-3","title":"Exploring the ability of machine learning-based virtual screening models to identify the functional groups responsible for binding","display_name":"Exploring the ability of machine learning-based virtual screening models to identify the functional groups responsible for binding","publication_year":2023,"publication_date":"2023-09-19","ids":{"openalex":"https://openalex.org/W4386847912","doi":"https://doi.org/10.1186/s13321-023-00755-3","pmid":"https://pubmed.ncbi.nlm.nih.gov/37726844"},"language":"en","primary_location":{"id":"doi:10.1186/s13321-023-00755-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00755-3","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00755-3","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00755-3","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040409076","display_name":"Thomas E. Hadfield","orcid":"https://orcid.org/0000-0001-5397-6320"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]},{"id":"https://openalex.org/I2802123492","display_name":"Oxford Research Group","ror":"https://ror.org/00z4w4f29","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I2802123492"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Thomas E. Hadfield","raw_affiliation_strings":["Oxford Protein Informatics Group, Department of Statistics, University of Oxford, Oxford, UK"],"affiliations":[{"raw_affiliation_string":"Oxford Protein Informatics Group, Department of Statistics, University of Oxford, Oxford, UK","institution_ids":["https://openalex.org/I2802123492","https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064355603","display_name":"Jack Scantlebury","orcid":"https://orcid.org/0000-0001-9877-0107"},"institutions":[{"id":"https://openalex.org/I2802123492","display_name":"Oxford Research Group","ror":"https://ror.org/00z4w4f29","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I2802123492"]},{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jack Scantlebury","raw_affiliation_strings":["Oxford Protein Informatics Group, Department of Statistics, University of Oxford, Oxford, UK"],"affiliations":[{"raw_affiliation_string":"Oxford Protein Informatics Group, Department of Statistics, University of Oxford, Oxford, UK","institution_ids":["https://openalex.org/I2802123492","https://openalex.org/I40120149"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015572211","display_name":"Charlotte M. Deane","orcid":"https://orcid.org/0000-0003-1388-2252"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]},{"id":"https://openalex.org/I4210146410","display_name":"Science Oxford","ror":"https://ror.org/04j8yhy50","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I4210146410"]},{"id":"https://openalex.org/I2802123492","display_name":"Oxford Research Group","ror":"https://ror.org/00z4w4f29","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I2802123492"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Charlotte M. Deane","raw_affiliation_strings":["Oxford Protein Informatics Group, Department of Statistics, University of Oxford, Oxford, UK. deane@stats.ox.ac.uk","Oxford Protein Informatics Group, Department of Statistics, University of Oxford, Oxford, UK"],"affiliations":[{"raw_affiliation_string":"Oxford Protein Informatics Group, Department of Statistics, University of Oxford, Oxford, UK. deane@stats.ox.ac.uk","institution_ids":["https://openalex.org/I40120149","https://openalex.org/I2802123492","https://openalex.org/I4210146410"]},{"raw_affiliation_string":"Oxford Protein Informatics Group, Department of Statistics, University of Oxford, Oxford, UK","institution_ids":["https://openalex.org/I2802123492","https://openalex.org/I40120149"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5040409076"],"corresponding_institution_ids":["https://openalex.org/I2802123492","https://openalex.org/I40120149"],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":1.5568,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.8582097,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"15","issue":"1","first_page":"84","last_page":"84"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/virtual-screening","display_name":"Virtual screening","score":0.7553101778030396},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6638585329055786},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5309825539588928},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4450334310531616},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.35800713300704956},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.32694011926651},{"id":"https://openalex.org/keywords/drug-discovery","display_name":"Drug discovery","score":0.2969138026237488},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.24848204851150513},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.09152811765670776}],"concepts":[{"id":"https://openalex.org/C103697762","wikidata":"https://www.wikidata.org/wiki/Q4112105","display_name":"Virtual screening","level":3,"score":0.7553101778030396},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6638585329055786},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5309825539588928},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4450334310531616},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.35800713300704956},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32694011926651},{"id":"https://openalex.org/C74187038","wikidata":"https://www.wikidata.org/wiki/Q1418791","display_name":"Drug discovery","level":2,"score":0.2969138026237488},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.24848204851150513},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.09152811765670776}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/s13321-023-00755-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00755-3","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00755-3","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},{"id":"pmid:37726844","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37726844","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of cheminformatics","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:10509074","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/10509074","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC10509074/pdf/13321_2023_Article_755.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Cheminform","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:35e3cbb9c86245468e7c48f5051c1d0f","is_oa":true,"landing_page_url":"https://doaj.org/article/35e3cbb9c86245468e7c48f5051c1d0f","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics, Vol 15, Iss 1, Pp 1-15 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s13321-023-00755-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00755-3","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00755-3","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4399999976158142,"id":"https://metadata.un.org/sdg/15","display_name":"Life in Land"}],"awards":[{"id":"https://openalex.org/G1571791257","display_name":"EPSRC and MRC Centre for Doctoral Training in Systems Approaches to Biomedical Science","funder_award_id":"EP/L016044/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G1934935867","display_name":null,"funder_award_id":"Engineering and Physical Sciences R","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G5223408957","display_name":"Automating compound detection and prioritisation for fragment-based drug design","funder_award_id":"BB/S507611/1","funder_id":"https://openalex.org/F4320334629","funder_display_name":"Biotechnology and Biological Sciences Research Council"},{"id":"https://openalex.org/G6653872853","display_name":null,"funder_award_id":"EP/L016","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320307779","display_name":"Roche","ror":"https://ror.org/00by1q217"},{"id":"https://openalex.org/F4320309432","display_name":"F. Hoffmann-La Roche","ror":"https://ror.org/00by1q217"},{"id":"https://openalex.org/F4320313480","display_name":"LifeArc","ror":"https://ror.org/01dqb0q37"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"},{"id":"https://openalex.org/F4320334629","display_name":"Biotechnology and Biological Sciences Research Council","ror":"https://ror.org/00cwqg982"},{"id":"https://openalex.org/F4320337846","display_name":"UCB Pharma","ror":"https://ror.org/01n029866"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4386847912.pdf"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W1757990252","https://openalex.org/W1968319881","https://openalex.org/W1988037271","https://openalex.org/W1993285168","https://openalex.org/W2052030759","https://openalex.org/W2128983966","https://openalex.org/W2134967712","https://openalex.org/W2148512505","https://openalex.org/W2526925775","https://openalex.org/W2583907533","https://openalex.org/W2587598315","https://openalex.org/W2608559058","https://openalex.org/W2610148085","https://openalex.org/W2766447205","https://openalex.org/W2794434752","https://openalex.org/W2889677957","https://openalex.org/W2894566366","https://openalex.org/W2913668833","https://openalex.org/W2918239264","https://openalex.org/W2946617578","https://openalex.org/W2969325194","https://openalex.org/W2969980075","https://openalex.org/W3009999522","https://openalex.org/W3011847211","https://openalex.org/W3015572666","https://openalex.org/W3038151251","https://openalex.org/W3045085645","https://openalex.org/W3100704554","https://openalex.org/W3101155908","https://openalex.org/W3104705366","https://openalex.org/W3133900975","https://openalex.org/W3177828909","https://openalex.org/W3186179742","https://openalex.org/W3209764902","https://openalex.org/W3212854871","https://openalex.org/W4220933119","https://openalex.org/W4223916004","https://openalex.org/W4225266560","https://openalex.org/W4281381643","https://openalex.org/W4376131268"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Many":[0],"recently":[1,167],"proposed":[2,168],"structure-based":[3],"virtual":[4,64,171,214,247],"screening":[5,65,172,215,248],"models":[6,66,224],"appear":[7],"to":[8,11,60,69,125,141,154,197,225,241],"be":[9],"able":[10,68],"accurately":[12],"distinguish":[13],"high":[14],"affinity":[15],"binders":[16],"from":[17],"non-binders.":[18],"However,":[19],"several":[20],"recent":[21],"studies":[22],"have":[23,233],"shown":[24],"that":[25,165,191,204],"they":[26],"often":[27],"do":[28],"so":[29],"by":[30,113,132],"exploiting":[31],"ligand-specific":[32,205],"biases":[33],"in":[34,43,81,108,211],"the":[35,44,58,71,79,83,109,122,126,144,155,176,219,227,243],"dataset,":[36],"rather":[37],"than":[38,185],"identifying":[39],"favourable":[40],"intermolecular":[41],"interactions":[42],"input":[45],"protein-ligand":[46,96,128],"complex.":[47],"In":[48,200],"this":[49],"work":[50],"we":[51,98,163,202],"propose":[52,99],"a":[53,91,100,114,133,166,186],"novel":[54],"approach":[55],"for":[56,75,102],"assessing":[57],"extent":[59],"which":[61],"machine":[62],"learning-based":[63,170],"are":[67],"identify":[70,226],"functional":[72,179,230],"groups":[73,180],"responsible":[74],"binding.":[76],"To":[77],"sidestep":[78],"difficulty":[80],"establishing":[82],"ground":[84,145],"truth":[85,146],"importance":[86,147],"of":[87,90,95,119,148,221,245],"each":[88,149],"atom":[89,150],"large":[92],"scale":[93],"set":[94],"complexes,":[97],"protocol":[101],"generating":[103],"synthetic":[104,127,236],"data.":[105],"Each":[106],"ligand":[107],"dataset":[110],"is":[111,130,251],"surrounded":[112],"randomly":[115],"sampled":[116],"point":[117],"cloud":[118],"pharmacophores,":[120],"and":[121,151],"label":[123],"assigned":[124],"complex":[129],"determined":[131],"3-dimensional":[134],"deterministic":[135],"binding":[136],"rule.":[137],"This":[138],"allows":[139],"us":[140],"precisely":[142],"quantify":[143],"compare":[152],"it":[153,192],"model":[156],"generated":[157,161],"attributions.":[158],"Using":[159],"our":[160,235],"datasets,":[162,216],"demonstrate":[164],"deep":[169],"model,":[173],"PointVS,":[174],"identified":[175],"most":[177,228],"important":[178,229],"with":[181],"39%":[182],"more":[183,195],"efficiency":[184],"fingerprint-based":[187],"random":[188],"forest,":[189],"suggesting":[190],"would":[193],"generalise":[194],"effectively":[196],"new":[198,246],"examples.":[199],"addition,":[201],"found":[203],"biases,":[206],"such":[207],"as":[208],"those":[209],"present":[210],"widely":[212],"used":[213],"substantially":[217],"impaired":[218],"ability":[220],"all":[222],"ML":[223],"groups.":[231],"We":[232],"made":[234],"data":[237],"generation":[238],"framework":[239],"available":[240,252],"facilitate":[242],"benchmarking":[244],"models.":[249],"Code":[250],"at":[253],"https://github.com/tomhadfield95/synthVS":[254],".":[255]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
