{"id":"https://openalex.org/W4393607043","doi":"https://doi.org/10.5281/zenodo.5542201","title":"Data for \"Training data composition affects performance of protein structure analysis algorithms\" by A. Derry, K. A. Carpenter, &amp; R. B. Altman","display_name":"Data for \"Training data composition affects performance of protein structure analysis algorithms\" by A. Derry, K. A. Carpenter, &amp; R. B. Altman","publication_year":2021,"publication_date":"2021-09-30","ids":{"openalex":"https://openalex.org/W4393607043","doi":"https://doi.org/10.5281/zenodo.5542201"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:5542201","is_oa":true,"landing_page_url":"https://zenodo.org/record/5542201","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/5542201","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067436655","display_name":"Alexander Derry","orcid":"https://orcid.org/0000-0003-2076-1184"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Derry, Alexander","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081232063","display_name":"Kristy A. Carpenter","orcid":"https://orcid.org/0000-0003-4570-5170"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carpenter, Kristy A.","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084043782","display_name":"Russ B. Altman","orcid":"https://orcid.org/0000-0003-3859-2905"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Altman, Russ B.","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5067436655"],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.977400004863739,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11162","display_name":"Enzyme Structure and Function","score":0.9660999774932861,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5433815121650696},{"id":"https://openalex.org/keywords/composition","display_name":"Composition (language)","score":0.45443740487098694},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4346614181995392},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3786616921424866},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.35826119780540466},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.19455933570861816}],"concepts":[{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5433815121650696},{"id":"https://openalex.org/C40231798","wikidata":"https://www.wikidata.org/wiki/Q1333743","display_name":"Composition (language)","level":2,"score":0.45443740487098694},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4346614181995392},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3786616921424866},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.35826119780540466},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.19455933570861816},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:zenodo.org:5542201","is_oa":true,"landing_page_url":"https://zenodo.org/record/5542201","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.5542201","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.5542201","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:5542201","is_oa":true,"landing_page_url":"https://zenodo.org/record/5542201","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W1979597421","https://openalex.org/W2007980826","https://openalex.org/W2051487156","https://openalex.org/W2061531152","https://openalex.org/W3002753104","https://openalex.org/W2077600819","https://openalex.org/W2142036596","https://openalex.org/W2072657027","https://openalex.org/W2600246793"],"abstract_inverted_index":{"<strong>Description</strong>":[0],"This":[1],"repository":[2],"contains":[3],"all":[4],"data":[5,9,37,119],"used":[6,132],"in":[7,19,55,69,85,96,104,225],"\"Training":[8,118],"composition":[10,120],"affects":[11,121],"performance":[12,122],"of":[13,39,51,123,153,155],"protein":[14,124,156],"structure":[15,125,157],"analysis":[16,126],"algorithms\",":[17,127],"published":[18],"the":[20,40,136],"Pacific":[21],"Symposium":[22],"on":[23,90],"Biocomputing":[24],"2022":[25],"by":[26],"A.":[27,30,109,112,200],"Derry,":[28],"K.":[29,111],"Carpenter,":[31],"&amp;":[32,114,147,181,209],"R.":[33,115],"B.":[34,116],"Altman.":[35],"The":[36,214],"consists":[38],"following":[41,137],"files:":[42],"ema_zenodo_data.tar.gz:":[43],"train,":[44,59,73],"validation,":[45,60,74],"and":[46,61,75,81,99,165,222],"test":[47,62,76],"splits":[48,63,77],"for":[49,64,78,187],"Estimation":[50],"Model":[52],"Accuracy":[53],"task,":[54,68,84],"LMDB":[56],"format":[57,71,88],"design_zenodo_data.tar.gz:":[58],"Protein":[65,189],"Sequence":[66],"Design":[67],"JSON":[70],"enz_cat_res_zenodo_data.tar.gz:":[72],"Catalytic":[79,215],"Residue":[80],"Enzyme":[82],"Prediction":[83],"TF":[86],"record":[87],"Details":[89],"dataset":[91],"construction":[92],"can":[93,101],"be":[94,102],"found":[95,103],"our":[97,105],"paper":[98],"dataloaders":[100],"Github":[106],"repo.":[107],"<strong>Reference</strong>":[108],"Derry*,":[110],"Carpenter*,":[113],"Altman,":[117],"2021.":[128],"<strong>Dataset":[129],"References</strong>":[130],"Datasets":[131],"were":[133],"derived":[134],"from":[135],"works:":[138],"Kryshtafovych,":[139],"A.,":[140],"Schwede,":[141],"T.,":[142],"Topf,":[143],"M.,":[144],"Fidelis,":[145],"K.,":[146,178],"Moult,":[148],"J.":[149,203,211],"(2019).":[150,184],"Critical":[151],"assessment":[152],"methods":[154],"prediction":[158],"(CASP)\u2014Round":[159],"XIII.":[160],"In":[161],"<em>Proteins:":[162],"Structure,":[163],"Function":[164],"Bioinformatics</em>":[166],"(Vol.":[167],"87,":[168],"Issue":[169],"12,":[170],"pp.":[171],"1011\u20131020).":[172],"https://doi.org/10.1002/prot.25823":[173],"Ingraham,":[174],"J.,":[175],"Garg,":[176],"V.":[177],"Barzilay,":[179],"R.,":[180,208],"Jaakkola,":[182],"T.":[183,199],"<em>Generative":[185],"Models":[186],"Graph-Based":[188],"Design</em>.":[190],"https://openreview.net/pdf?id=SJgxrLLKOE":[191],"Furnham,":[192],"N.,":[193],"Holliday,":[194],"G.":[195],"L.,":[196],"de":[197],"Beer,":[198],"P.,":[201],"Jacobsen,":[202],"O.":[204],"B.,":[205],"Pearson,":[206],"W.":[207],"Thornton,":[210],"M.":[212],"(2014).":[213],"Site":[216],"Atlas":[217],"2.0:":[218],"cataloging":[219],"catalytic":[220],"sites":[221],"residues":[223],"identified":[224],"enzymes.":[226],"<em>Nucleic":[227],"Acids":[228],"Research</em>,":[229],"<em>42":[230],"</em>(Database":[231],"issue),":[232],"D485\u2013D489.":[233]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
