{"id":"https://openalex.org/W6907278982","doi":"https://doi.org/10.21227/gq2v-8k24","title":"Dataset for Reducing Ensembles of Protein Tertiary Structures Generated De Novo via Clustering","display_name":"Dataset for Reducing Ensembles of Protein Tertiary Structures Generated De Novo via Clustering","publication_year":2020,"publication_date":"2020-04-17","ids":{"openalex":"https://openalex.org/W6907278982","doi":"https://doi.org/10.21227/gq2v-8k24"},"language":"en","primary_location":{"id":"doi:10.21227/gq2v-8k24","is_oa":true,"landing_page_url":"https://doi.org/10.21227/gq2v-8k24","pdf_url":null,"source":{"id":"https://openalex.org/S7407051695","display_name":"IEEE DataPort","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.21227/gq2v-8k24","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zaman, Ahmed Bin","orcid":null},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zaman, Ahmed Bin","raw_affiliation_strings":["George Mason University"],"affiliations":[{"raw_affiliation_string":"George Mason University","institution_ids":["https://openalex.org/I162714631"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kamranfar, Parastoo","orcid":null},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kamranfar, Parastoo","raw_affiliation_strings":["George Mason University"],"affiliations":[{"raw_affiliation_string":"George Mason University","institution_ids":["https://openalex.org/I162714631"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Domeniconi, Carlotta","orcid":null},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Domeniconi, Carlotta","raw_affiliation_strings":["George Mason University"],"affiliations":[{"raw_affiliation_string":"George Mason University","institution_ids":["https://openalex.org/I162714631"]}]},{"author_position":"last","author":{"id":null,"display_name":"Shehu, Amarda","orcid":null},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shehu, Amarda","raw_affiliation_strings":["George Mason University"],"affiliations":[{"raw_affiliation_string":"George Mason University","institution_ids":["https://openalex.org/I162714631"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I162714631"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":null,"topics":[],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6535999774932861},{"id":"https://openalex.org/keywords/protein-structure","display_name":"Protein structure","score":0.4602999985218048},{"id":"https://openalex.org/keywords/protein-tertiary-structure","display_name":"Protein tertiary structure","score":0.4489000141620636},{"id":"https://openalex.org/keywords/protein-structure-prediction","display_name":"Protein structure prediction","score":0.40529999136924744},{"id":"https://openalex.org/keywords/fragment","display_name":"Fragment (logic)","score":0.35929998755455017},{"id":"https://openalex.org/keywords/structural-alignment","display_name":"Structural alignment","score":0.3546000123023987},{"id":"https://openalex.org/keywords/rule-of-thumb","display_name":"Rule of thumb","score":0.349700003862381}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6535999774932861},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6430000066757202},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.48330000042915344},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.4602999985218048},{"id":"https://openalex.org/C75599170","wikidata":"https://www.wikidata.org/wiki/Q898483","display_name":"Protein tertiary structure","level":2,"score":0.4489000141620636},{"id":"https://openalex.org/C18051474","wikidata":"https://www.wikidata.org/wiki/Q899656","display_name":"Protein structure prediction","level":3,"score":0.40529999136924744},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3971000015735626},{"id":"https://openalex.org/C2776235265","wikidata":"https://www.wikidata.org/wiki/Q18392052","display_name":"Fragment (logic)","level":2,"score":0.35929998755455017},{"id":"https://openalex.org/C4668613","wikidata":"https://www.wikidata.org/wiki/Q4116110","display_name":"Structural alignment","level":5,"score":0.3546000123023987},{"id":"https://openalex.org/C89246107","wikidata":"https://www.wikidata.org/wiki/Q1398821","display_name":"Rule of thumb","level":2,"score":0.349700003862381},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.33340001106262207},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.2842999994754791},{"id":"https://openalex.org/C136475424","wikidata":"https://www.wikidata.org/wiki/Q7251500","display_name":"Protein structure database","level":4,"score":0.28349998593330383},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2815000116825104},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2703000009059906},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26910001039505005},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26030001044273376}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21227/gq2v-8k24","is_oa":true,"landing_page_url":"https://doi.org/10.21227/gq2v-8k24","pdf_url":null,"source":{"id":"https://openalex.org/S7407051695","display_name":"IEEE DataPort","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.21227/gq2v-8k24","is_oa":true,"landing_page_url":"https://doi.org/10.21227/gq2v-8k24","pdf_url":null,"source":{"id":"https://openalex.org/S7407051695","display_name":"IEEE DataPort","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Controlling":[0],"the":[1,41,48,113,117,143,146,157,160],"quality":[2],"of":[3,22,64,80,104,116],"tertiary":[4],"structures":[5,29,39,65,106,115],"computed":[6],"for":[7,121,128,139],"a":[8,12,61,92],"protein":[9,17,118],"molecule":[10],"remains":[11],"central":[13],"challenge":[14],"in":[15,145],"de-novo":[16],"structure":[18,130],"prediction.":[19],"The":[20,148,166],"rule":[21],"thumb":[23],"is":[24,58,73,150,164],"to":[25,99,159],"generate":[26],"as":[27,30],"many":[28],"can":[31,172],"be":[32,173],"afforded,":[33],"effectively":[34],"acknowledging":[35],"that":[36,43,59],"having":[37],"more":[38],"increases":[40],"likelihood":[42],"some":[44],"will":[45,155],"reside":[46],"near":[47],"sought":[49],"biologically-active":[50,114],"structure.":[51],"A":[52],"major":[53],"drawback":[54],"with":[55,75,169],"this":[56,170],"approach":[57,95],"computing":[60],"large":[62],"number":[63],"imposes":[66],"time":[67],"and":[68,136,141,153],"space":[69],"costs.":[70],"This":[71,110],"dataset":[72,111,171],"associated":[74,168],"our":[76],"paper,":[77],"\"Reducing":[78],"Ensembles":[79],"Protein":[81],"Tertiary":[82],"Structures":[83],"Generated":[84],"De":[85],"Novo":[86],"via":[87],"Clustering\",":[88],"where":[89],"we":[90,97,154],"propose":[91],"novel":[93],"clustering-based":[94],"which":[96],"demonstrate":[98],"significantly":[100],"reduce":[101],"an":[102],"ensemble":[103],"generated":[105,132],"without":[107],"sacrificing":[108],"quality.":[109],"provides":[112],"targets":[119],"used":[120],"evaluation,":[122],"necessary":[123,138],"data":[124,137],"(sequence,":[125],"fragment":[126],"files)":[127],"generating":[129,140],"ensembles,":[131,133,135],"reduced":[134],"plotting":[142],"results":[144],"paper.":[147],"paper":[149,161],"under":[151],"review":[152],"update":[156],"link":[158],"once":[162],"it":[163],"published.":[165],"codes":[167],"found":[174],"in,":[175],"https://github.com/ahmed0804/ReducedDecoyPool":[176]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
