{"id":"https://openalex.org/W4313597804","doi":"https://doi.org/10.1186/s13321-022-00672-x","title":"Papyrus: a large-scale curated dataset aimed at bioactivity predictions","display_name":"Papyrus: a large-scale curated dataset aimed at bioactivity predictions","publication_year":2023,"publication_date":"2023-01-06","ids":{"openalex":"https://openalex.org/W4313597804","doi":"https://doi.org/10.1186/s13321-022-00672-x","pmid":"https://pubmed.ncbi.nlm.nih.gov/36609528"},"language":"en","primary_location":{"id":"doi:10.1186/s13321-022-00672-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-022-00672-x","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-022-00672-x","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-022-00672-x","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030705778","display_name":"Olivier J. M. B\u00e9quignon","orcid":"https://orcid.org/0000-0002-7554-9220"},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]},{"id":"https://openalex.org/I4210133171","display_name":"Centre for Human Drug Research","ror":"https://ror.org/044hshx49","country_code":"NL","type":"facility","lineage":["https://openalex.org/I4210133171"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"O. J. M. B\u00e9quignon","raw_affiliation_strings":["Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands","institution_ids":["https://openalex.org/I4210133171","https://openalex.org/I121797337"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050432136","display_name":"Brandon J. Bongers","orcid":"https://orcid.org/0000-0003-1322-2369"},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]},{"id":"https://openalex.org/I4210133171","display_name":"Centre for Human Drug Research","ror":"https://ror.org/044hshx49","country_code":"NL","type":"facility","lineage":["https://openalex.org/I4210133171"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"B. J. Bongers","raw_affiliation_strings":["Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands","institution_ids":["https://openalex.org/I4210133171","https://openalex.org/I121797337"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063810062","display_name":"Willem Jespers","orcid":"https://orcid.org/0000-0002-4951-9220"},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]},{"id":"https://openalex.org/I4210133171","display_name":"Centre for Human Drug Research","ror":"https://ror.org/044hshx49","country_code":"NL","type":"facility","lineage":["https://openalex.org/I4210133171"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"W. Jespers","raw_affiliation_strings":["Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands","institution_ids":["https://openalex.org/I4210133171","https://openalex.org/I121797337"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065525958","display_name":"Adriaan P. IJzerman","orcid":"https://orcid.org/0000-0002-1182-2259"},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]},{"id":"https://openalex.org/I4210133171","display_name":"Centre for Human Drug Research","ror":"https://ror.org/044hshx49","country_code":"NL","type":"facility","lineage":["https://openalex.org/I4210133171"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"A. P. IJzerman","raw_affiliation_strings":["Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands","institution_ids":["https://openalex.org/I4210133171","https://openalex.org/I121797337"]}]},{"author_position":"middle","author":{"id":null,"display_name":"B. van der Water","orcid":null},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]},{"id":"https://openalex.org/I4210133171","display_name":"Centre for Human Drug Research","ror":"https://ror.org/044hshx49","country_code":"NL","type":"facility","lineage":["https://openalex.org/I4210133171"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"B. van der Water","raw_affiliation_strings":["Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands","institution_ids":["https://openalex.org/I4210133171","https://openalex.org/I121797337"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088996741","display_name":"Gerard J. P. van Westen","orcid":"https://orcid.org/0000-0003-0717-1817"},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]},{"id":"https://openalex.org/I4210133171","display_name":"Centre for Human Drug Research","ror":"https://ror.org/044hshx49","country_code":"NL","type":"facility","lineage":["https://openalex.org/I4210133171"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"G. J. P. van Westen","raw_affiliation_strings":["Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands. gerard@lacdr.leidenuniv.nl","Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands. gerard@lacdr.leidenuniv.nl","institution_ids":["https://openalex.org/I4210133171"]},{"raw_affiliation_string":"Division of Drug Discovery and Safety, Leiden Academic Centre for Drug Research, Leiden University, Leiden, The Netherlands","institution_ids":["https://openalex.org/I4210133171","https://openalex.org/I121797337"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5088996741"],"corresponding_institution_ids":["https://openalex.org/I121797337","https://openalex.org/I4210133171"],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":14.5946,"has_fulltext":true,"cited_by_count":75,"citation_normalized_percentile":{"value":0.99236616,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"15","issue":"1","first_page":"3","last_page":"3"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10252","display_name":"Microbial Natural Products and Biosynthesis","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/2736","display_name":"Pharmacology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11882","display_name":"Plant biochemistry and biosynthesis","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/chembl","display_name":"chEMBL","score":0.8753705620765686},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8133358955383301},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6901835799217224},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5844265818595886},{"id":"https://openalex.org/keywords/data-cleansing","display_name":"Data cleansing","score":0.5595293045043945},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.557274580001831},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.5253956913948059},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4916124939918518},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.47831085324287415},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4601458013057709},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4521031081676483},{"id":"https://openalex.org/keywords/data-collection","display_name":"Data collection","score":0.42690807580947876},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.364656925201416},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25465989112854004},{"id":"https://openalex.org/keywords/drug-discovery","display_name":"Drug discovery","score":0.19394809007644653},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.16733583807945251}],"concepts":[{"id":"https://openalex.org/C63222358","wikidata":"https://www.wikidata.org/wiki/Q6120337","display_name":"chEMBL","level":3,"score":0.8753705620765686},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8133358955383301},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6901835799217224},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5844265818595886},{"id":"https://openalex.org/C42199009","wikidata":"https://www.wikidata.org/wiki/Q1172378","display_name":"Data cleansing","level":4,"score":0.5595293045043945},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.557274580001831},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.5253956913948059},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4916124939918518},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.47831085324287415},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4601458013057709},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4521031081676483},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.42690807580947876},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.364656925201416},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25465989112854004},{"id":"https://openalex.org/C74187038","wikidata":"https://www.wikidata.org/wiki/Q1418791","display_name":"Drug discovery","level":2,"score":0.19394809007644653},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.16733583807945251},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":7,"locations":[{"id":"doi:10.1186/s13321-022-00672-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-022-00672-x","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-022-00672-x","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},{"id":"pmid:36609528","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/36609528","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of cheminformatics","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:9824924","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/9824924","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC9824924/pdf/13321_2022_Article_672.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Cheminform","raw_type":"Text"},{"id":"pmh:oai:scholarlypublications.universiteitleiden.nl:item_3567189","is_oa":true,"landing_page_url":"https://hdl.handle.net/1887/3567189","pdf_url":"https://scholarlypublications.universiteitleiden.nl/access/item%3A3567190/view","source":{"id":"https://openalex.org/S4306400850","display_name":"Leiden Repository (Leiden University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I121797337","host_organization_name":"Leiden University","host_organization_lineage":["https://openalex.org/I121797337"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of cheminformatics","raw_type":"Article / Letter to editor"},{"id":"pmh:oai:doaj.org/article:90fdc489b52b4c0c9542e9b0522dafdb","is_oa":true,"landing_page_url":"https://doaj.org/article/90fdc489b52b4c0c9542e9b0522dafdb","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics, Vol 15, Iss 1, Pp 1-11 (2023)","raw_type":"article"},{"id":"pmh:oai:scholarlypublications.universiteitleiden.nl:item_4297138","is_oa":true,"landing_page_url":"https://hdl.handle.net/1887/4297138","pdf_url":null,"source":{"id":"https://openalex.org/S4306400850","display_name":"Leiden Repository (Leiden University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I121797337","host_organization_name":"Leiden University","host_organization_lineage":["https://openalex.org/I121797337"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics","raw_type":"Text"},{"id":"pmh:ul:oai:scholarlypublications.universiteitleiden.nl:item_3567189","is_oa":true,"landing_page_url":"http://hdl.handle.net/1887/3567189","pdf_url":null,"source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of cheminformatics, 15(1):3","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1186/s13321-022-00672-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-022-00672-x","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-022-00672-x","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4313597804.pdf"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W959778778","https://openalex.org/W1508604947","https://openalex.org/W1964513093","https://openalex.org/W1970197319","https://openalex.org/W1975875968","https://openalex.org/W1991238353","https://openalex.org/W1993046136","https://openalex.org/W2010893565","https://openalex.org/W2019678805","https://openalex.org/W2026091184","https://openalex.org/W2035585923","https://openalex.org/W2061789405","https://openalex.org/W2062450879","https://openalex.org/W2092285329","https://openalex.org/W2096560421","https://openalex.org/W2122025333","https://openalex.org/W2123676318","https://openalex.org/W2130479394","https://openalex.org/W2153693853","https://openalex.org/W2169678694","https://openalex.org/W2204695023","https://openalex.org/W2211193336","https://openalex.org/W2295598076","https://openalex.org/W2432967145","https://openalex.org/W2473190403","https://openalex.org/W2494869284","https://openalex.org/W2556014018","https://openalex.org/W2580289839","https://openalex.org/W2593632281","https://openalex.org/W2740946158","https://openalex.org/W2773242491","https://openalex.org/W2791355014","https://openalex.org/W2803094965","https://openalex.org/W2901476322","https://openalex.org/W2905012389","https://openalex.org/W2920724574","https://openalex.org/W2970971581","https://openalex.org/W2979238556","https://openalex.org/W2980789587","https://openalex.org/W3008588639","https://openalex.org/W3015572666","https://openalex.org/W3023189783","https://openalex.org/W3037386608","https://openalex.org/W3082081167","https://openalex.org/W3104508774","https://openalex.org/W3127297964","https://openalex.org/W3133989558","https://openalex.org/W3213647524","https://openalex.org/W4210702584","https://openalex.org/W4220904544","https://openalex.org/W4231101093","https://openalex.org/W4313597804","https://openalex.org/W6891685257","https://openalex.org/W6910027793"],"related_works":["https://openalex.org/W962911587","https://openalex.org/W2270762093","https://openalex.org/W3126834064","https://openalex.org/W4200551113","https://openalex.org/W2028861106","https://openalex.org/W4255072332","https://openalex.org/W1754154538","https://openalex.org/W3169246587","https://openalex.org/W4253714063","https://openalex.org/W2984010599"],"abstract_inverted_index":{"With":[0],"the":[1,52,62,84],"ongoing":[2],"rapid":[3],"growth":[4],"of":[5,15,26,37,45,59,90,144,151],"publicly":[6,101],"available":[7,102],"ligand-protein":[8],"bioactivity":[9],"data,":[10],"there":[11],"is":[12,33,48,88,129,161],"a":[13,24,42,66,72,126,142,168],"trove":[14],"valuable":[16],"data":[17,32,53,64,94,119,137,165,184],"that":[18,128,162,171],"can":[19,69,138,172],"be":[20,71,139,173],"used":[21,174],"to":[22,50,54],"train":[23],"plethora":[25],"machine-learning":[27],"algorithms.":[28],"However,":[29],"not":[30],"all":[31],"equal":[34],"in":[35,125,141],"terms":[36],"size":[38],"and":[39,41,107,123,146,156],"quality":[40],"significant":[43],"portion":[44],"researchers'":[46],"time":[47],"needed":[49],"adapt":[51],"their":[55],"needs.":[56],"On":[57],"top":[58],"that,":[60],"finding":[61],"right":[63],"for":[65,131,175,186],"research":[67],"question":[68],"often":[70],"challenge":[73],"on":[74],"its":[75],"own.":[76],"To":[77],"meet":[78],"these":[79],"challenges,":[80],"we":[81],"have":[82],"constructed":[83],"Papyrus":[85,87],"dataset.":[86],"comprised":[89],"around":[91],"60":[92],"million":[93],"points.":[95],"This":[96],"dataset":[97],"contains":[98],"multiple":[99],"large":[100],"datasets":[103,113],"such":[104],"as":[105],"ChEMBL":[106],"ExCAPE-DB":[108],"combined":[109],"with":[110],"several":[111],"smaller":[112],"containing":[114],"high-quality":[115],"data.":[116],"The":[117],"aggregated":[118],"has":[120],"been":[121],"standardised":[122],"normalised":[124],"manner":[127],"suitable":[130],"machine":[132],"learning.":[133],"We":[134],"show":[135],"how":[136],"filtered":[140],"variety":[143],"ways":[145],"also":[147,180],"perform":[148],"some":[149],"examples":[150],"quantitative":[152],"structure-activity":[153],"relationship":[154],"analyses":[155],"proteochemometric":[157],"modelling.":[158],"Our":[159],"ambition":[160],"this":[163],"pruned":[164],"collection":[166],"constitutes":[167],"benchmark":[169],"set":[170],"constructing":[176],"predictive":[177],"models,":[178],"while":[179],"providing":[181],"an":[182],"accessible":[183],"source":[185],"research.":[187]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":17},{"year":2024,"cited_by_count":26},{"year":2023,"cited_by_count":25}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
