{"id":"https://openalex.org/W4226054611","doi":"https://doi.org/10.1186/s13321-021-00576-2","title":"Splitting chemical structure data sets for federated privacy-preserving machine learning","display_name":"Splitting chemical structure data sets for federated privacy-preserving machine learning","publication_year":2021,"publication_date":"2021-12-01","ids":{"openalex":"https://openalex.org/W4226054611","doi":"https://doi.org/10.1186/s13321-021-00576-2","pmid":"https://pubmed.ncbi.nlm.nih.gov/34876230"},"language":"en","primary_location":{"id":"doi:10.1186/s13321-021-00576-2","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-021-00576-2","pdf_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/s13321-021-00576-2","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/s13321-021-00576-2","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048408288","display_name":"Jaak Simm","orcid":"https://orcid.org/0000-0002-5543-993X"},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Jaak Simm","raw_affiliation_strings":["KU Leuven, ESAT-STADIUS, Kasteelpark Arenberg 10, 3001, Heverlee, Belgium"],"affiliations":[{"raw_affiliation_string":"KU Leuven, ESAT-STADIUS, Kasteelpark Arenberg 10, 3001, Heverlee, Belgium","institution_ids":["https://openalex.org/I99464096"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015053198","display_name":"Lina Humbeck","orcid":"https://orcid.org/0000-0003-3151-9158"},"institutions":[{"id":"https://openalex.org/I1330995197","display_name":"Boehringer Ingelheim (Germany)","ror":"https://ror.org/00q32j219","country_code":"DE","type":"company","lineage":["https://openalex.org/I1330995197"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Lina Humbeck","raw_affiliation_strings":["Medicinal Chemistry Department, Boehringer Ingelheim Pharma GmbH & Co. KG, Birkendorfer Str. 65, 88397, Biberach an der Riss, Germany"],"affiliations":[{"raw_affiliation_string":"Medicinal Chemistry Department, Boehringer Ingelheim Pharma GmbH & Co. KG, Birkendorfer Str. 65, 88397, Biberach an der Riss, Germany","institution_ids":["https://openalex.org/I1330995197"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063226863","display_name":"Adam Zalewski","orcid":"https://orcid.org/0000-0002-8341-9565"},"institutions":[{"id":"https://openalex.org/I4210121112","display_name":"Amgen (Germany)","ror":"https://ror.org/02ezy5072","country_code":"DE","type":"company","lineage":["https://openalex.org/I1320553840","https://openalex.org/I4210121112"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Adam Zalewski","raw_affiliation_strings":["Amgen Research (Munich) GmbH, Staffelseestra\u00dfe 2, 81477, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Amgen Research (Munich) GmbH, Staffelseestra\u00dfe 2, 81477, Munich, Germany","institution_ids":["https://openalex.org/I4210121112"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066956474","display_name":"No\u00e9 Sturm","orcid":"https://orcid.org/0000-0002-9775-7872"},"institutions":[{"id":"https://openalex.org/I1283582996","display_name":"Novartis (Switzerland)","ror":"https://ror.org/02f9zrr09","country_code":"CH","type":"company","lineage":["https://openalex.org/I1283582996"]},{"id":"https://openalex.org/I4400600974","display_name":"Novartis Institutes for BioMedical Research","ror":"https://ror.org/053gv2m95","country_code":null,"type":"funder","lineage":["https://openalex.org/I1283582996","https://openalex.org/I4400600974"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Noe Sturm","raw_affiliation_strings":["Novartis Institutes for BioMedical Research, Novartis Campus, CH-4002, Basel, Switzerland"],"affiliations":[{"raw_affiliation_string":"Novartis Institutes for BioMedical Research, Novartis Campus, CH-4002, Basel, Switzerland","institution_ids":["https://openalex.org/I1283582996","https://openalex.org/I4400600974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014831578","display_name":"Wouter Heyndrickx","orcid":"https://orcid.org/0000-0002-0809-9442"},"institutions":[{"id":"https://openalex.org/I137982388","display_name":"Janssen (Belgium)","ror":"https://ror.org/04yzcpd71","country_code":"BE","type":"company","lineage":["https://openalex.org/I1330063522","https://openalex.org/I137982388"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Wouter Heyndrickx","raw_affiliation_strings":["Janssen Pharmaceutica N.V., Janssen Pharmaceutica, Turnhoutseweg 30, 2340, Beerse, Belgium"],"affiliations":[{"raw_affiliation_string":"Janssen Pharmaceutica N.V., Janssen Pharmaceutica, Turnhoutseweg 30, 2340, Beerse, Belgium","institution_ids":["https://openalex.org/I137982388"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041984163","display_name":"Yves Moreau","orcid":"https://orcid.org/0000-0002-4647-6560"},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Yves Moreau","raw_affiliation_strings":["KU Leuven, ESAT-STADIUS, Kasteelpark Arenberg 10, 3001, Heverlee, Belgium"],"affiliations":[{"raw_affiliation_string":"KU Leuven, ESAT-STADIUS, Kasteelpark Arenberg 10, 3001, Heverlee, Belgium","institution_ids":["https://openalex.org/I99464096"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078568475","display_name":"Bernd Beck","orcid":null},"institutions":[{"id":"https://openalex.org/I1330995197","display_name":"Boehringer Ingelheim (Germany)","ror":"https://ror.org/00q32j219","country_code":"DE","type":"company","lineage":["https://openalex.org/I1330995197"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Bernd Beck","raw_affiliation_strings":["Medicinal Chemistry Department, Boehringer Ingelheim Pharma GmbH & Co. KG, Birkendorfer Str. 65, 88397, Biberach an der Riss, Germany"],"affiliations":[{"raw_affiliation_string":"Medicinal Chemistry Department, Boehringer Ingelheim Pharma GmbH & Co. KG, Birkendorfer Str. 65, 88397, Biberach an der Riss, Germany","institution_ids":["https://openalex.org/I1330995197"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029464406","display_name":"Ansgar Schuffenhauer","orcid":"https://orcid.org/0000-0001-6385-0414"},"institutions":[{"id":"https://openalex.org/I1283582996","display_name":"Novartis (Switzerland)","ror":"https://ror.org/02f9zrr09","country_code":"CH","type":"company","lineage":["https://openalex.org/I1283582996"]},{"id":"https://openalex.org/I4400600974","display_name":"Novartis Institutes for BioMedical Research","ror":"https://ror.org/053gv2m95","country_code":null,"type":"funder","lineage":["https://openalex.org/I1283582996","https://openalex.org/I4400600974"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Ansgar Schuffenhauer","raw_affiliation_strings":["Novartis Institutes for BioMedical Research, Novartis Campus, CH-4002, Basel, Switzerland. ansgar.schuffenhauer@novartis.com","Novartis Institutes for BioMedical Research, Novartis Campus, CH-4002, Basel, Switzerland"],"affiliations":[{"raw_affiliation_string":"Novartis Institutes for BioMedical Research, Novartis Campus, CH-4002, Basel, Switzerland. ansgar.schuffenhauer@novartis.com","institution_ids":[]},{"raw_affiliation_string":"Novartis Institutes for BioMedical Research, Novartis Campus, CH-4002, Basel, Switzerland","institution_ids":["https://openalex.org/I1283582996","https://openalex.org/I4400600974"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5048408288"],"corresponding_institution_ids":["https://openalex.org/I99464096"],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":4.1263,"has_fulltext":true,"cited_by_count":58,"citation_normalized_percentile":{"value":0.95153997,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"13","issue":"1","first_page":"96","last_page":"96"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10180","display_name":"Analytical chemistry methods development","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/1602","display_name":"Analytical Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10180","display_name":"Analytical chemistry methods development","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/1602","display_name":"Analytical Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7769298553466797},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4840894937515259},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3818717300891876}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7769298553466797},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4840894937515259},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3818717300891876}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1186/s13321-021-00576-2","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-021-00576-2","pdf_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/s13321-021-00576-2","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},{"id":"pmid:34876230","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34876230","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of cheminformatics","raw_type":null},{"id":"pmh:oai:lirias2repo.kuleuven.be:20.500.12942/686163","is_oa":true,"landing_page_url":"https://lirias.kuleuven.be/handle/20.500.12942/686163","pdf_url":null,"source":{"id":"https://openalex.org/S7407055369","display_name":"Lirias","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"Journal Of Cheminformatics, vol. 13 (1), Art.No. ARTN 96","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:doaj.org/article:c8f2a73d49b048ff969a9b0819f495f9","is_oa":true,"landing_page_url":"https://doaj.org/article/c8f2a73d49b048ff969a9b0819f495f9","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics, Vol 13, Iss 1, Pp 1-14 (2021)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:8650276","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8650276","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Cheminform","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/s13321-021-00576-2","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-021-00576-2","pdf_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/s13321-021-00576-2","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.550000011920929,"display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G4956428346","display_name":null,"funder_award_id":"Horizon 2020 research and innovatio","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G5036817778","display_name":null,"funder_award_id":"European Union's Horizon 2020 research and innov","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G5180414528","display_name":null,"funder_award_id":"Innovative Medicines Initiative 2 Joint Undertakin","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G6366719553","display_name":null,"funder_award_id":"831472","funder_id":"https://openalex.org/F4320326631","funder_display_name":"Innovative Medicines Initiative"},{"id":"https://openalex.org/G7171503580","display_name":null,"funder_award_id":"831472","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8318064016","display_name":null,"funder_award_id":"Horizon","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8633428685","display_name":null,"funder_award_id":"European Union's Horizon 2020 research and innovat","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320314178","display_name":"European Federation of Pharmaceutical Industries and Associations","ror":"https://ror.org/00g1x4v36"},{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320326631","display_name":"Innovative Medicines Initiative","ror":"https://ror.org/019af4n30"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4226054611.pdf","grobid_xml":"https://content.openalex.org/works/W4226054611.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W188370263","https://openalex.org/W1723619723","https://openalex.org/W1978229532","https://openalex.org/W1984994707","https://openalex.org/W1988037271","https://openalex.org/W1995495408","https://openalex.org/W2019678805","https://openalex.org/W2046344452","https://openalex.org/W2060531713","https://openalex.org/W2078385636","https://openalex.org/W2179066627","https://openalex.org/W2204197091","https://openalex.org/W2397857137","https://openalex.org/W2558999090","https://openalex.org/W2613791735","https://openalex.org/W2714724074","https://openalex.org/W2767079719","https://openalex.org/W2912213068","https://openalex.org/W2966357564","https://openalex.org/W3036993656","https://openalex.org/W3097605476","https://openalex.org/W3155808134","https://openalex.org/W3216856705","https://openalex.org/W4248987559"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"With":[0],"the":[1,15,50,53,60,105,154,174,184,201,217],"increase":[2],"in":[3,9,62,81,127,163,196,205,216],"applications":[4],"of":[5,17,29,38,120,148,183,200,207,219],"machine":[6,84],"learning":[7,85],"methods":[8,115,151],"drug":[10],"design":[11],"and":[12,24,45,74,124,168,176,192],"related":[13],"fields,":[14],"challenge":[16,31,67],"designing":[18],"sound":[19],"test":[20,46,54,175],"sets":[21],"becomes":[22],"more":[23,25,79],"prominent.":[26],"The":[27,180],"goal":[28],"this":[30,110],"is":[32,56,68,77,213],"to":[33,58,159],"have":[34],"a":[35,63,82,92,118,121,128],"realistic":[36],"split":[37],"chemical":[39,98],"structures":[40,99],"(compounds)":[41],"between":[42,104,173],"training,":[43],"validation":[44],"set":[47,55,123,178],"such":[48],"that":[49],"performance":[51,61],"on":[52],"meaningful":[57],"infer":[59],"prospective":[64],"application.":[65],"This":[66],"by":[69],"its":[70],"own":[71],"very":[72,214],"interesting":[73],"relevant,":[75],"but":[76],"even":[78],"complex":[80],"federated":[83,129,220],"approach":[86],"where":[87,97],"multiple":[88],"partners":[89],"jointly":[90],"train":[91],"model":[93],"under":[94],"privacy-preserving":[95,130,221],"conditions":[96],"must":[100],"not":[101],"be":[102],"shared":[103],"different":[106],"participating":[107],"parties.":[108],"In":[109],"work":[111],"we":[112,152],"discuss":[113],"three":[114],"which":[116],"provide":[117],"splitting":[119,150,199],"data":[122,169,202],"are":[125,186],"applicable":[126],"setting,":[131],"namely:":[132],"a.":[133,187],"locality-sensitive":[134],"hashing":[135],"(LSH),":[136],"b.":[137,204],"sphere":[138,189,210],"exclusion":[139,190,211],"clustering,":[140],"c.":[141],"scaffold-based":[142,193],"binning":[143,194],"(scaffold":[144],"network).":[145],"For":[146],"evaluation":[147],"these":[149],"consider":[153],"following":[155],"quality":[156,198],"criteria":[157],"(compared":[158],"random":[160],"splitting):":[161],"bias":[162],"prediction":[164],"performance,":[165],"classification":[166],"label":[167],"imbalance,":[170],"similarity":[171],"distance":[172],"training":[177],"compounds.":[179],"main":[181],"findings":[182],"paper":[185],"both":[188],"clustering":[191,212],"result":[195],"high":[197],"sets,":[203],"terms":[206],"compute":[208],"costs":[209],"expensive":[215],"case":[218],"setting.":[222]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":21},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
