{"id":"https://openalex.org/W4306831161","doi":"https://doi.org/10.1371/journal.pcbi.1010610","title":"DPCfam: Unsupervised protein family classification by Density Peak Clustering of large sequence datasets","display_name":"DPCfam: Unsupervised protein family classification by Density Peak Clustering of large sequence datasets","publication_year":2022,"publication_date":"2022-10-19","ids":{"openalex":"https://openalex.org/W4306831161","doi":"https://doi.org/10.1371/journal.pcbi.1010610","pmid":"https://pubmed.ncbi.nlm.nih.gov/36260616"},"language":"en","primary_location":{"id":"doi:10.1371/journal.pcbi.1010610","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1010610","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1010610&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1010610&type=printable","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041855360","display_name":"Elena Tea Russo","orcid":"https://orcid.org/0000-0002-0061-2328"},"institutions":[{"id":"https://openalex.org/I138549579","display_name":"Scuola Internazionale Superiore di Studi Avanzati","ror":"https://ror.org/004fze387","country_code":"IT","type":"education","lineage":["https://openalex.org/I138549579"]},{"id":"https://openalex.org/I2801626668","display_name":"AREA Science Park","ror":"https://ror.org/01dt7qh15","country_code":"IT","type":"facility","lineage":["https://openalex.org/I2801626668"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Elena Tea Russo","raw_affiliation_strings":["AREA SCIENCE PARK, Trieste, Italy","SISSA, Trieste, Italy"],"raw_orcid":"https://orcid.org/0000-0002-0061-2328","affiliations":[{"raw_affiliation_string":"AREA SCIENCE PARK, Trieste, Italy","institution_ids":["https://openalex.org/I2801626668"]},{"raw_affiliation_string":"SISSA, Trieste, Italy","institution_ids":["https://openalex.org/I138549579"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054897657","display_name":"Federico Barone","orcid":"https://orcid.org/0000-0001-5696-670X"},"institutions":[{"id":"https://openalex.org/I138549579","display_name":"Scuola Internazionale Superiore di Studi Avanzati","ror":"https://ror.org/004fze387","country_code":"IT","type":"education","lineage":["https://openalex.org/I138549579"]},{"id":"https://openalex.org/I142444530","display_name":"University of Trieste","ror":"https://ror.org/02n742c10","country_code":"IT","type":"education","lineage":["https://openalex.org/I142444530"]},{"id":"https://openalex.org/I2801626668","display_name":"AREA Science Park","ror":"https://ror.org/01dt7qh15","country_code":"IT","type":"facility","lineage":["https://openalex.org/I2801626668"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Federico Barone","raw_affiliation_strings":["AREA SCIENCE PARK, Trieste, Italy","Department of Mathematics and Geosciences, University of Trieste, Trieste, Italy","SISSA, Trieste, Italy"],"raw_orcid":"https://orcid.org/0000-0001-5696-670X","affiliations":[{"raw_affiliation_string":"AREA SCIENCE PARK, Trieste, Italy","institution_ids":["https://openalex.org/I2801626668"]},{"raw_affiliation_string":"Department of Mathematics and Geosciences, University of Trieste, Trieste, Italy","institution_ids":["https://openalex.org/I142444530"]},{"raw_affiliation_string":"SISSA, Trieste, Italy","institution_ids":["https://openalex.org/I138549579"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059430118","display_name":"Alex Bateman","orcid":null},"institutions":[{"id":"https://openalex.org/I1303153112","display_name":"European Bioinformatics Institute","ror":"https://ror.org/02catss52","country_code":"GB","type":"facility","lineage":["https://openalex.org/I1303153112","https://openalex.org/I4210138560"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Alex Bateman","raw_affiliation_strings":["European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0002-6982-4660","affiliations":[{"raw_affiliation_string":"European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Genome Campus, Hinxton, United Kingdom","institution_ids":["https://openalex.org/I1303153112"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078180852","display_name":"Stefano Cozzini","orcid":"https://orcid.org/0000-0001-6049-5242"},"institutions":[{"id":"https://openalex.org/I2801626668","display_name":"AREA Science Park","ror":"https://ror.org/01dt7qh15","country_code":"IT","type":"facility","lineage":["https://openalex.org/I2801626668"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Stefano Cozzini","raw_affiliation_strings":["AREA SCIENCE PARK, Trieste, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AREA SCIENCE PARK, Trieste, Italy","institution_ids":["https://openalex.org/I2801626668"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056369160","display_name":"Marco Punta","orcid":"https://orcid.org/0000-0002-0050-0676"},"institutions":[{"id":"https://openalex.org/I154387261","display_name":"Vita-Salute San Raffaele University","ror":"https://ror.org/01gmqr298","country_code":"IT","type":"education","lineage":["https://openalex.org/I154387261"]},{"id":"https://openalex.org/I4210153126","display_name":"Istituti di Ricovero e Cura a Carattere Scientifico","ror":"https://ror.org/04tfzc498","country_code":"IT","type":"healthcare","lineage":["https://openalex.org/I4210153126"]},{"id":"https://openalex.org/I4387155925","display_name":"Istituto di Ricovero e Cura a Carattere Scientifico San Raffaele","ror":"https://ror.org/006x48140","country_code":null,"type":"healthcare","lineage":["https://openalex.org/I4210153126","https://openalex.org/I4387155925"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Marco Punta","raw_affiliation_strings":["Center for Omics Sciences, IRCCS San Raffaele Institute, Milan, Italy","Unit of Immunogenetics, Leukemia Genomics and Immunobiology, Division of Immunology, Transplantation and Infectious Disease, IRCCS San Raffaele Scientific Institute, Milan, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Omics Sciences, IRCCS San Raffaele Institute, Milan, Italy","institution_ids":["https://openalex.org/I4210153126","https://openalex.org/I4387155925"]},{"raw_affiliation_string":"Unit of Immunogenetics, Leukemia Genomics and Immunobiology, Division of Immunology, Transplantation and Infectious Disease, IRCCS San Raffaele Scientific Institute, Milan, Italy","institution_ids":["https://openalex.org/I154387261","https://openalex.org/I4210153126","https://openalex.org/I4387155925"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066542398","display_name":"Alessandro Laio","orcid":"https://orcid.org/0000-0001-9164-7907"},"institutions":[{"id":"https://openalex.org/I12323705","display_name":"The Abdus Salam International Centre for Theoretical Physics (ICTP)","ror":"https://ror.org/009gyvm78","country_code":"IT","type":"facility","lineage":["https://openalex.org/I12323705","https://openalex.org/I1286959531","https://openalex.org/I1293226324","https://openalex.org/I2801247003"]},{"id":"https://openalex.org/I138549579","display_name":"Scuola Internazionale Superiore di Studi Avanzati","ror":"https://ror.org/004fze387","country_code":"IT","type":"education","lineage":["https://openalex.org/I138549579"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Alessandro Laio","raw_affiliation_strings":["ICTP, Trieste, Italy","SISSA, Trieste, Italy"],"raw_orcid":"https://orcid.org/0000-0001-9164-7907","affiliations":[{"raw_affiliation_string":"ICTP, Trieste, Italy","institution_ids":["https://openalex.org/I12323705"]},{"raw_affiliation_string":"SISSA, Trieste, Italy","institution_ids":["https://openalex.org/I138549579"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5056369160","https://openalex.org/A5066542398"],"corresponding_institution_ids":["https://openalex.org/I12323705","https://openalex.org/I138549579","https://openalex.org/I154387261","https://openalex.org/I4210153126","https://openalex.org/I4387155925"],"apc_list":{"value":2655,"currency":"USD","value_usd":2655},"apc_paid":{"value":2655,"currency":"USD","value_usd":2655},"fwci":0.6944,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.67398029,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":"18","issue":"10","first_page":"e1010610","last_page":"e1010610"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6868864893913269},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6073846220970154},{"id":"https://openalex.org/keywords/protein-family","display_name":"Protein family","score":0.5897553563117981},{"id":"https://openalex.org/keywords/protein-sequencing","display_name":"Protein sequencing","score":0.5864295959472656},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5666877031326294},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5216529965400696},{"id":"https://openalex.org/keywords/protein-domain","display_name":"Protein domain","score":0.5118964910507202},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.5073749423027039},{"id":"https://openalex.org/keywords/sequence-alignment","display_name":"Sequence alignment","score":0.5006163120269775},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4742487967014313},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4557693600654602},{"id":"https://openalex.org/keywords/sequence-database","display_name":"Sequence database","score":0.4286516606807709},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.3633880913257599},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.33263489603996277},{"id":"https://openalex.org/keywords/peptide-sequence","display_name":"Peptide sequence","score":0.29522278904914856},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.231054425239563},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2130984663963318},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.12130269408226013}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6868864893913269},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6073846220970154},{"id":"https://openalex.org/C171897839","wikidata":"https://www.wikidata.org/wiki/Q417841","display_name":"Protein family","level":3,"score":0.5897553563117981},{"id":"https://openalex.org/C10010492","wikidata":"https://www.wikidata.org/wiki/Q3142557","display_name":"Protein sequencing","level":4,"score":0.5864295959472656},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5666877031326294},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5216529965400696},{"id":"https://openalex.org/C144292202","wikidata":"https://www.wikidata.org/wiki/Q898273","display_name":"Protein domain","level":3,"score":0.5118964910507202},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.5073749423027039},{"id":"https://openalex.org/C45484198","wikidata":"https://www.wikidata.org/wiki/Q827246","display_name":"Sequence alignment","level":4,"score":0.5006163120269775},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4742487967014313},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4557693600654602},{"id":"https://openalex.org/C41584329","wikidata":"https://www.wikidata.org/wiki/Q175902","display_name":"Sequence database","level":3,"score":0.4286516606807709},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.3633880913257599},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.33263489603996277},{"id":"https://openalex.org/C167625842","wikidata":"https://www.wikidata.org/wiki/Q899763","display_name":"Peptide sequence","level":3,"score":0.29522278904914856},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.231054425239563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2130984663963318},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.12130269408226013},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000072417","descriptor_name":"Protein Domains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000072417","descriptor_name":"Protein Domains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000072417","descriptor_name":"Protein Domains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000072417","descriptor_name":"Protein Domains","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":4,"locations":[{"id":"doi:10.1371/journal.pcbi.1010610","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1010610","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1010610&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},{"id":"pmid:36260616","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/36260616","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS computational biology","raw_type":null},{"id":"pmh:oai:doaj.org/article:14dfd283ab414e45854d9b5c99ebf75a","is_oa":true,"landing_page_url":"https://doaj.org/article/14dfd283ab414e45854d9b5c99ebf75a","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Computational Biology, Vol 18, Iss 10, p e1010610 (2022)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:9621593","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/9621593","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Comput Biol","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1371/journal.pcbi.1010610","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1010610","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1010610&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/17","score":0.46000000834465027,"display_name":"Partnerships for the goals"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4306831161.pdf","grobid_xml":"https://content.openalex.org/works/W4306831161.grobid-xml"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W31923072","https://openalex.org/W1971147414","https://openalex.org/W1999402559","https://openalex.org/W2006192061","https://openalex.org/W2007745123","https://openalex.org/W2031903428","https://openalex.org/W2055043387","https://openalex.org/W2071381739","https://openalex.org/W2102652793","https://openalex.org/W2106383137","https://openalex.org/W2112493377","https://openalex.org/W2116441099","https://openalex.org/W2124166542","https://openalex.org/W2132926880","https://openalex.org/W2133790733","https://openalex.org/W2141925828","https://openalex.org/W2142678478","https://openalex.org/W2147199176","https://openalex.org/W2153971450","https://openalex.org/W2156125289","https://openalex.org/W2157190239","https://openalex.org/W2158906453","https://openalex.org/W2161880345","https://openalex.org/W2162758337","https://openalex.org/W2165835468","https://openalex.org/W2412976325","https://openalex.org/W2761688050","https://openalex.org/W2806884808","https://openalex.org/W2889944714","https://openalex.org/W2900359059","https://openalex.org/W2907872701","https://openalex.org/W2912250830","https://openalex.org/W2950954328","https://openalex.org/W2984894304","https://openalex.org/W2991012415","https://openalex.org/W3095583226","https://openalex.org/W3112376646","https://openalex.org/W3134393247","https://openalex.org/W3143063265","https://openalex.org/W3177828909","https://openalex.org/W3211795435","https://openalex.org/W4375858802","https://openalex.org/W6769989246"],"related_works":["https://openalex.org/W2075451741","https://openalex.org/W2036521446","https://openalex.org/W2926533301","https://openalex.org/W2103210188","https://openalex.org/W2135912076","https://openalex.org/W2053803793","https://openalex.org/W2121560377","https://openalex.org/W4306831161","https://openalex.org/W2152376949","https://openalex.org/W2137488129"],"abstract_inverted_index":{"Proteins":[0],"that":[1],"are":[2,182,192],"known":[3],"only":[4],"at":[5,41],"a":[6,55,77,87,91,200],"sequence":[7,61,79],"level":[8],"outnumber":[9],"those":[10],"with":[11,177],"an":[12],"experimental":[13],"characterization":[14],"by":[15,69,162],"orders":[16],"of":[17,45,58,73,90,102,107,110,137,147,153,174],"magnitude.":[18],"Classifying":[19],"protein":[20,60,78,123,175,188],"regions":[21,176],"(domains)":[22],"into":[23],"homologous":[24],"families":[25,150,155],"can":[26,156],"generate":[27],"testable":[28],"functional":[29],"hypotheses":[30],"for":[31,185],"yet":[32],"unannotated":[33],"sequences.":[34,84],"Existing":[35],"domain":[36],"family":[37],"resources":[38],"typically":[39],"use":[40],"least":[42],"some":[43],"degree":[44],"manual":[46],"curation:":[47],"they":[48],"grow":[49],"slowly":[50],"over":[51],"time":[52],"and":[53,104,140,151],"leave":[54],"large":[56],"fraction":[57],"the":[59,108,135,138,196],"space":[62],"unclassified.":[63],"We":[64,85],"here":[65],"describe":[66],"automatic":[67,128],"clustering":[68],"Density":[70],"Peak":[71],"Clustering":[72],"UniRef50":[74],"v.":[75],"2017_07,":[76],"database":[80],"including":[81],"approximately":[82],"23M":[83],"performed":[86],"radical":[88],"re-implementation":[89],"pipeline":[92],"we":[93,117],"previously":[94],"developed":[95],"in":[96,125,131,143],"order":[97,109],"to":[98,134,159,195],"allow":[99],"handling":[100],"millions":[101],"sequences":[103],"data":[105],"volumes":[106],"3":[111],"TeraBytes.":[112],"The":[113],"modified":[114],"pipeline,":[115],"which":[116,181],"call":[118],"DPCfam,":[119],"finds":[120,168],"\u223c":[121],"45,000":[122],"clusters":[124,160,172],"UniRef50.":[126],"Our":[127],"classification":[129],"is":[130],"close":[132],"correspondence":[133],"ones":[136],"Pfam":[139,149,179],"ECOD":[141,154],"resources:":[142],"particular,":[144],"about":[145],"81%":[146],"medium-large":[148],"72%":[152],"be":[157],"mapped":[158],"generated":[161],"DPCfam.":[163],"In":[164],"addition,":[165],"our":[166],"protocol":[167],"more":[169],"than":[170],"14,000":[171],"constituted":[173],"no":[178],"annotation,":[180],"therefore":[183],"candidates":[184],"representing":[186],"novel":[187],"families.":[189],"These":[190],"results":[191],"made":[193],"available":[194],"scientific":[197],"community":[198],"through":[199],"dedicated":[201],"repository.":[202]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
