{"id":"https://openalex.org/W2950560741","doi":"https://doi.org/10.1371/journal.pcbi.1007282","title":"Size and structure of the sequence space of repeat proteins","display_name":"Size and structure of the sequence space of repeat proteins","publication_year":2019,"publication_date":"2019-08-15","ids":{"openalex":"https://openalex.org/W2950560741","doi":"https://doi.org/10.1371/journal.pcbi.1007282","mag":"2950560741","pmid":"https://pubmed.ncbi.nlm.nih.gov/31415557"},"language":"en","primary_location":{"id":"doi:10.1371/journal.pcbi.1007282","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1007282","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1007282&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1007282&type=printable","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023018956","display_name":"Jacopo Marchi","orcid":"https://orcid.org/0000-0003-1192-1203"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I204730241","display_name":"Universit\u00e9 Paris Cit\u00e9","ror":"https://ror.org/05f82e368","country_code":"FR","type":"education","lineage":["https://openalex.org/I204730241"]},{"id":"https://openalex.org/I2746051580","display_name":"Universit\u00e9 Paris Sciences et Lettres","ror":"https://ror.org/013cjyk83","country_code":"FR","type":"education","lineage":["https://openalex.org/I2746051580"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Jacopo Marchi","raw_affiliation_strings":["Laboratoire de physique de l'\u00c9cole normale sup\u00e9rieure (PSL University), CNRS, Sorbonne Universit\u00e9, and Universit\u00e9 de Paris, 75005 Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Laboratoire de physique de l'\u00c9cole normale sup\u00e9rieure (PSL University), CNRS, Sorbonne Universit\u00e9, and Universit\u00e9 de Paris, 75005 Paris, France","institution_ids":["https://openalex.org/I204730241","https://openalex.org/I39804081","https://openalex.org/I2746051580","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035085585","display_name":"Ezequiel A. Galpern","orcid":null},"institutions":[{"id":"https://openalex.org/I151201029","display_name":"Consejo Nacional de Investigaciones Cient\u00edficas y T\u00e9cnicas","ror":"https://ror.org/03cqe8w59","country_code":"AR","type":"government","lineage":["https://openalex.org/I151201029","https://openalex.org/I4210123736","https://openalex.org/I4387155568"]},{"id":"https://openalex.org/I24354313","display_name":"Universidad de Buenos Aires","ror":"https://ror.org/0081fs513","country_code":"AR","type":"education","lineage":["https://openalex.org/I24354313"]},{"id":"https://openalex.org/I53241121","display_name":"Fundaci\u00f3n Ciencias Exactas y Naturales","ror":"https://ror.org/05rxmkq09","country_code":"AR","type":"nonprofit","lineage":["https://openalex.org/I53241121"]}],"countries":["AR"],"is_corresponding":false,"raw_author_name":"Ezequiel A. Galpern","raw_affiliation_strings":["CONICET - Universidad de Buenos Aires, Instituto de Qu\u00edmica Biol\u00f3gica de la Facultad de Ciencias Exactas y Naturales (IQUIBICEN), Buenos Aires, Argentina","Protein Physiology Lab, Universidad de Buenos Aires, Facultad de Ciencias Exactas y Naturales, Departamento de Qu\u00edmica Biol\u00f3gica, Buenos Aires, Argentina"],"raw_orcid":"https://orcid.org/0000-0001-9516-3985","affiliations":[{"raw_affiliation_string":"CONICET - Universidad de Buenos Aires, Instituto de Qu\u00edmica Biol\u00f3gica de la Facultad de Ciencias Exactas y Naturales (IQUIBICEN), Buenos Aires, Argentina","institution_ids":["https://openalex.org/I53241121","https://openalex.org/I151201029","https://openalex.org/I24354313"]},{"raw_affiliation_string":"Protein Physiology Lab, Universidad de Buenos Aires, Facultad de Ciencias Exactas y Naturales, Departamento de Qu\u00edmica Biol\u00f3gica, Buenos Aires, Argentina","institution_ids":["https://openalex.org/I53241121","https://openalex.org/I24354313"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054139274","display_name":"Roc\u00edo Espada","orcid":"https://orcid.org/0000-0003-3829-473X"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I2746051580","display_name":"Universit\u00e9 Paris Sciences et Lettres","ror":"https://ror.org/013cjyk83","country_code":"FR","type":"education","lineage":["https://openalex.org/I2746051580"]},{"id":"https://openalex.org/I98910050","display_name":"ESPCI Paris","ror":"https://ror.org/03zx86w41","country_code":"FR","type":"education","lineage":["https://openalex.org/I190752583","https://openalex.org/I2746051580","https://openalex.org/I98910050"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Rocio Espada","raw_affiliation_strings":["Laboratoire Gulliver, Ecole sup\u00e9rieure de physique et chimie industrielles (PSL University) and CNRS, 75005, Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Laboratoire Gulliver, Ecole sup\u00e9rieure de physique et chimie industrielles (PSL University) and CNRS, 75005, Paris, France","institution_ids":["https://openalex.org/I98910050","https://openalex.org/I2746051580","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026249041","display_name":"Diego U. Ferreiro","orcid":"https://orcid.org/0000-0002-7869-4247"},"institutions":[{"id":"https://openalex.org/I151201029","display_name":"Consejo Nacional de Investigaciones Cient\u00edficas y T\u00e9cnicas","ror":"https://ror.org/03cqe8w59","country_code":"AR","type":"government","lineage":["https://openalex.org/I151201029","https://openalex.org/I4210123736","https://openalex.org/I4387155568"]},{"id":"https://openalex.org/I24354313","display_name":"Universidad de Buenos Aires","ror":"https://ror.org/0081fs513","country_code":"AR","type":"education","lineage":["https://openalex.org/I24354313"]},{"id":"https://openalex.org/I53241121","display_name":"Fundaci\u00f3n Ciencias Exactas y Naturales","ror":"https://ror.org/05rxmkq09","country_code":"AR","type":"nonprofit","lineage":["https://openalex.org/I53241121"]}],"countries":["AR"],"is_corresponding":false,"raw_author_name":"Diego U. Ferreiro","raw_affiliation_strings":["CONICET - Universidad de Buenos Aires, Instituto de Qu\u00edmica Biol\u00f3gica de la Facultad de Ciencias Exactas y Naturales (IQUIBICEN), Buenos Aires, Argentina","Protein Physiology Lab, Universidad de Buenos Aires, Facultad de Ciencias Exactas y Naturales, Departamento de Qu\u00edmica Biol\u00f3gica, Buenos Aires, Argentina"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CONICET - Universidad de Buenos Aires, Instituto de Qu\u00edmica Biol\u00f3gica de la Facultad de Ciencias Exactas y Naturales (IQUIBICEN), Buenos Aires, Argentina","institution_ids":["https://openalex.org/I53241121","https://openalex.org/I151201029","https://openalex.org/I24354313"]},{"raw_affiliation_string":"Protein Physiology Lab, Universidad de Buenos Aires, Facultad de Ciencias Exactas y Naturales, Departamento de Qu\u00edmica Biol\u00f3gica, Buenos Aires, Argentina","institution_ids":["https://openalex.org/I53241121","https://openalex.org/I24354313"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016850339","display_name":"Aleksandra M. Walczak","orcid":"https://orcid.org/0000-0002-2686-5702"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I204730241","display_name":"Universit\u00e9 Paris Cit\u00e9","ror":"https://ror.org/05f82e368","country_code":"FR","type":"education","lineage":["https://openalex.org/I204730241"]},{"id":"https://openalex.org/I2746051580","display_name":"Universit\u00e9 Paris Sciences et Lettres","ror":"https://ror.org/013cjyk83","country_code":"FR","type":"education","lineage":["https://openalex.org/I2746051580"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Aleksandra M. Walczak","raw_affiliation_strings":["Laboratoire de physique de l'\u00c9cole normale sup\u00e9rieure (PSL University), CNRS, Sorbonne Universit\u00e9, and Universit\u00e9 de Paris, 75005 Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Laboratoire de physique de l'\u00c9cole normale sup\u00e9rieure (PSL University), CNRS, Sorbonne Universit\u00e9, and Universit\u00e9 de Paris, 75005 Paris, France","institution_ids":["https://openalex.org/I204730241","https://openalex.org/I39804081","https://openalex.org/I2746051580","https://openalex.org/I1294671590"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064369206","display_name":"Thierry Mora","orcid":"https://orcid.org/0000-0002-5456-9361"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I204730241","display_name":"Universit\u00e9 Paris Cit\u00e9","ror":"https://ror.org/05f82e368","country_code":"FR","type":"education","lineage":["https://openalex.org/I204730241"]},{"id":"https://openalex.org/I2746051580","display_name":"Universit\u00e9 Paris Sciences et Lettres","ror":"https://ror.org/013cjyk83","country_code":"FR","type":"education","lineage":["https://openalex.org/I2746051580"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Thierry Mora","raw_affiliation_strings":["Laboratoire de physique de l'\u00c9cole normale sup\u00e9rieure (PSL University), CNRS, Sorbonne Universit\u00e9, and Universit\u00e9 de Paris, 75005 Paris, France"],"raw_orcid":"https://orcid.org/0000-0002-5456-9361","affiliations":[{"raw_affiliation_string":"Laboratoire de physique de l'\u00c9cole normale sup\u00e9rieure (PSL University), CNRS, Sorbonne Universit\u00e9, and Universit\u00e9 de Paris, 75005 Paris, France","institution_ids":["https://openalex.org/I204730241","https://openalex.org/I39804081","https://openalex.org/I2746051580","https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5016850339","https://openalex.org/A5064369206"],"corresponding_institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I204730241","https://openalex.org/I2746051580","https://openalex.org/I39804081"],"apc_list":{"value":2655,"currency":"USD","value_usd":2655},"apc_paid":{"value":2655,"currency":"USD","value_usd":2655},"fwci":2.3398,"has_fulltext":true,"cited_by_count":23,"citation_normalized_percentile":{"value":0.88650753,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"15","issue":"8","first_page":"e1007282","last_page":"e1007282"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11764","display_name":"Evolution and Genetic Dynamics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11764","display_name":"Evolution and Genetic Dynamics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence-space","display_name":"Sequence space","score":0.5882944464683533},{"id":"https://openalex.org/keywords/coding-region","display_name":"Coding region","score":0.5615510940551758},{"id":"https://openalex.org/keywords/conserved-sequence","display_name":"Conserved sequence","score":0.5188392400741577},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.5006856918334961},{"id":"https://openalex.org/keywords/sequence-logo","display_name":"Sequence logo","score":0.4995849132537842},{"id":"https://openalex.org/keywords/protein-family","display_name":"Protein family","score":0.48849165439605713},{"id":"https://openalex.org/keywords/amino-acid","display_name":"Amino acid","score":0.4803772568702698},{"id":"https://openalex.org/keywords/maxima-and-minima","display_name":"Maxima and minima","score":0.4750770926475525},{"id":"https://openalex.org/keywords/sequence-alignment","display_name":"Sequence alignment","score":0.45258885622024536},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.44385913014411926},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.43523353338241577},{"id":"https://openalex.org/keywords/evolutionary-biology","display_name":"Evolutionary biology","score":0.4301188290119171},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4235721230506897},{"id":"https://openalex.org/keywords/protein-structure","display_name":"Protein structure","score":0.42019039392471313},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.41988861560821533},{"id":"https://openalex.org/keywords/peptide-sequence","display_name":"Peptide sequence","score":0.39462852478027344},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3672124445438385},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.2609897553920746},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.17165392637252808},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.127920001745224}],"concepts":[{"id":"https://openalex.org/C30711495","wikidata":"https://www.wikidata.org/wiki/Q289411","display_name":"Sequence space","level":3,"score":0.5882944464683533},{"id":"https://openalex.org/C91779695","wikidata":"https://www.wikidata.org/wiki/Q3780824","display_name":"Coding region","level":3,"score":0.5615510940551758},{"id":"https://openalex.org/C199216141","wikidata":"https://www.wikidata.org/wiki/Q4995178","display_name":"Conserved sequence","level":4,"score":0.5188392400741577},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.5006856918334961},{"id":"https://openalex.org/C105082737","wikidata":"https://www.wikidata.org/wiki/Q7452470","display_name":"Sequence logo","level":5,"score":0.4995849132537842},{"id":"https://openalex.org/C171897839","wikidata":"https://www.wikidata.org/wiki/Q417841","display_name":"Protein family","level":3,"score":0.48849165439605713},{"id":"https://openalex.org/C515207424","wikidata":"https://www.wikidata.org/wiki/Q8066","display_name":"Amino acid","level":2,"score":0.4803772568702698},{"id":"https://openalex.org/C186633575","wikidata":"https://www.wikidata.org/wiki/Q845060","display_name":"Maxima and minima","level":2,"score":0.4750770926475525},{"id":"https://openalex.org/C45484198","wikidata":"https://www.wikidata.org/wiki/Q827246","display_name":"Sequence alignment","level":4,"score":0.45258885622024536},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.44385913014411926},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.43523353338241577},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.4301188290119171},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4235721230506897},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.42019039392471313},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.41988861560821533},{"id":"https://openalex.org/C167625842","wikidata":"https://www.wikidata.org/wiki/Q899763","display_name":"Peptide sequence","level":3,"score":0.39462852478027344},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3672124445438385},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.2609897553920746},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.17165392637252808},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.127920001745224},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C132954091","wikidata":"https://www.wikidata.org/wiki/Q194397","display_name":"Banach space","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008958","descriptor_name":"Models, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011487","descriptor_name":"Protein Conformation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011487","descriptor_name":"Protein Conformation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011487","descriptor_name":"Protein Conformation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011487","descriptor_name":"Protein Conformation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D013816","descriptor_name":"Thermodynamics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013816","descriptor_name":"Thermodynamics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013816","descriptor_name":"Thermodynamics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013816","descriptor_name":"Thermodynamics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D016415","descriptor_name":"Sequence Alignment","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D017124","descriptor_name":"Conserved Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017124","descriptor_name":"Conserved Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017124","descriptor_name":"Conserved Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017124","descriptor_name":"Conserved Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017386","descriptor_name":"Sequence Homology, Amino Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017386","descriptor_name":"Sequence Homology, Amino Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017386","descriptor_name":"Sequence Homology, Amino Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017386","descriptor_name":"Sequence Homology, Amino Acid","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017510","descriptor_name":"Protein Folding","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017510","descriptor_name":"Protein Folding","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017510","descriptor_name":"Protein Folding","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017510","descriptor_name":"Protein Folding","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019143","descriptor_name":"Evolution, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019143","descriptor_name":"Evolution, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019143","descriptor_name":"Evolution, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019143","descriptor_name":"Evolution, Molecular","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019277","descriptor_name":"Entropy","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019277","descriptor_name":"Entropy","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019277","descriptor_name":"Entropy","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019277","descriptor_name":"Entropy","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D020449","descriptor_name":"Repetitive Sequences, Amino Acid","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D020449","descriptor_name":"Repetitive Sequences, Amino Acid","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D020449","descriptor_name":"Repetitive Sequences, Amino Acid","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D020449","descriptor_name":"Repetitive Sequences, Amino Acid","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false}],"locations_count":7,"locations":[{"id":"doi:10.1371/journal.pcbi.1007282","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1007282","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1007282&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},{"id":"pmid:31415557","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/31415557","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS computational biology","raw_type":null},{"id":"pmh:oai:arXiv.org:1905.04493","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.04493","pdf_url":"https://arxiv.org/pdf/1905.04493","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:HAL:hal-02345012v1","is_oa":false,"landing_page_url":"https://hal.sorbonne-universite.fr/hal-02345012","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Computational Biology, 2019, 15 (8), pp.e1007282. &#x27E8;10.1371/journal.pcbi.1007282&#x27E9;","raw_type":"Journal articles"},{"id":"pmh:oai:doaj.org/article:edba023b564140f38dd483dd60e181d9","is_oa":true,"landing_page_url":"https://doaj.org/article/edba023b564140f38dd483dd60e181d9","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Computational Biology, Vol 15, Iss 8, p e1007282 (2019)","raw_type":"article"},{"id":"pmh:oai:europepmc.org:5712091","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/6733475","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},{"id":"pmh:oai:figshare.com:article/9636530","is_oa":true,"landing_page_url":"https://figshare.com/articles/dataset/Size_and_structure_of_the_sequence_space_of_repeat_proteins/9636530","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dataset"}],"best_oa_location":{"id":"doi:10.1371/journal.pcbi.1007282","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1007282","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1007282&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G301926269","display_name":null,"funder_award_id":"80NSSC18M0093","funder_id":"https://openalex.org/F4320331934","funder_display_name":"NASA Astrobiology Institute"},{"id":"https://openalex.org/G871666106","display_name":"LIFE IS ELECTRIC. THE ELECTRONIC CIRCUITRY IS CATALYZED BY A SMALL SUBSET OF PROTEINS THAT FUNCTION AS SOPHISTICATED NANOMACHINES. CURRENTLY  VERY LITTLE IS KNOWN ABOUT THE ORIGIN OF PROTEINS ON EARTH AND THEIR EVOLUTION IN EARLY MICROBIAL LIFE. TO FILL THIS KNOWLEDGE GAP  THE ENIGMA RESEARCH PROGRAM WILL CARRY OUT EXPERIMENTAL  BIOINFORMATIC  DATA-DRIVEN ABDUCTIVE STUDIES TO EXPLORE THE ORIGIN OF CATALYSIS  THE EVOLUTION OF PROTEIN STRUCTURES IN MICROBIAL ANCESTORS  AND THE CO-EVOLUTION OF PROTEINS AND THE GEOSPHERE THROUGH GEOLOGIC TIME. ENIGMA HAS THREE INTEGRATED RESEARCH THEMES FOCUSED ON UNDERSTANDING THE EVOLUTION OF PROTEINS INVOLVED IN ELECTRON TRANSFER AND ENERGY GENERATION. THEME 1 WILL INVESTIGATE THE SYNTHESIS AND FUNCTION OF METALLOPROTEINS IN PREBIOTIC SYSTEMS. EXTANT MICROBIAL PROTEINS WILL BE ANALYZED TO IDENTIFY THE SIMPLEST STRUCTURES THAT CAN PERFORM ELECTRON TRANSFER REACTIONS. USING DE NOVO COMPUTATIONAL PROTEIN DESIGN  SMALL PEPTIDES WILL BE SYNTHESIZED AND CHARACTERIZED TO ELUCIDATE THE EARLIEST BIOCHEMICAL REACTIONS IN THE ORIGIN OF LIFE. THEME 2 WILL EXAMINE THE EMERGENT COMPLEXITY OF METALLOPROTEINS IN MICROBIAL ANCESTORS. NEW COMPUTATIONAL METHODS LINKING PROTEIN STRUCTURE AND PHYLOGENY WILL BE DEVELOPED TO STUDY THE EVOLUTION OF METAL BINDING LIGANDS IN PROTEINS. A STRUCTURAL ALIGNMENT-BASED HOMOLOGY APPROACH WILL BE USED TO IDENTIFY THE OLDEST STRUCTURES IN PROTEINS AND TO TRACE THE EARLY EVOLUTION OF METAL BINDING FOLDS. THEME 3 WILL EXPLORE THE CO-EVOLUTION OF PROTEINS AND THE GEOSPHERE. DEEP-TIME DATA ON THE NATURE AND DISTRIBUTION OF REDOX-SENSITIVE ELEMENTS IN ROCKS AND MINERALS WILL BE INTEGRATED WITH PROTEIN STRUCTURAL DATA IN THE PROTEIN DATA BANK. MINERALOGICAL DATA (E.G. MINERAL COMPOSITION  SOLUBILITY  AND AGE) WILL BE INCORPORATED INTO GEOCHEMICAL MODELS TO CONSTRAIN THE BIOAVAILABILITY OF METALS OVER GEOLOGIC TIME AND METAL BIOAVAILABILITY DURING ARCHEAN AND PROTEROZOIC ERAS WILL USED TO DATE SPECIFIC POSITIONS IN THE EVOLUTIONARY TREE CONSTRUCTED IN THEME 2. THE THREE ENIGMA RESEARCH THEMES ARE DIRECTLY RELEVANT TO THE MAJOR TOPICS  SYNTHESIS AND FUNCTION OF MACROMOLECULES IN THE ORIGIN OF LIFE    EARLY LIFE AND INCREASING COMPLEXITY  AND  CO-EVOLUTION OF LIFE AND THE PHYSICAL ENVIRONMENT  IDENTIFIED IN THE 2015 ASTROBIOLOGY SCIENCE STRATEGY. FURTHERMORE  THE ENIGMA PROGRAM COMPLEMENTS THE NAI CAN 7 TEAMS ON EXPERIMENTAL MICROBIAL GENOMICS (GEORGIA TECH) AND LIFE ON A DYNAMIC EARLY EARTH (UC RIVERSIDE). THE RESULTS OF THE ENIGMA RESEARCH PROGRAM WILL HELP THE NAI ANSWER A CENTRAL QUESTION IN ASTROBIOLOGY: HOW DID PROTEINS EVOLVE TO BECOME THE CATALYSTS OF LIFE ON EARTH?","funder_award_id":"80NSSC18M0093","funder_id":"https://openalex.org/F4320306101","funder_display_name":"National Aeronautics and Space Administration"}],"funders":[{"id":"https://openalex.org/F4320306101","display_name":"National Aeronautics and Space Administration","ror":"https://ror.org/027ka1x80"},{"id":"https://openalex.org/F4320322037","display_name":"Nuclear Safety and Security Commission","ror":"https://ror.org/05qk3ge34"},{"id":"https://openalex.org/F4320331934","display_name":"NASA Astrobiology Institute","ror":"https://ror.org/01qxmdg18"},{"id":"https://openalex.org/F4320334812","display_name":"Comisi\u00f3n Nacional de Investigaci\u00f3n Cient\u00edfica y Tecnol\u00f3gica","ror":"https://ror.org/02ap3w078"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2950560741.pdf","grobid_xml":"https://content.openalex.org/works/W2950560741.grobid-xml"},"referenced_works_count":64,"referenced_works":["https://openalex.org/W1537712541","https://openalex.org/W1837722206","https://openalex.org/W1861406683","https://openalex.org/W1930635754","https://openalex.org/W1971565000","https://openalex.org/W1975353473","https://openalex.org/W1979762151","https://openalex.org/W1984148408","https://openalex.org/W1992613346","https://openalex.org/W1995972219","https://openalex.org/W1997005300","https://openalex.org/W1997325518","https://openalex.org/W2001438084","https://openalex.org/W2008545402","https://openalex.org/W2013979082","https://openalex.org/W2016666153","https://openalex.org/W2018983082","https://openalex.org/W2021769301","https://openalex.org/W2030100088","https://openalex.org/W2032558547","https://openalex.org/W2051545676","https://openalex.org/W2061042699","https://openalex.org/W2072843928","https://openalex.org/W2081515402","https://openalex.org/W2092803887","https://openalex.org/W2094795801","https://openalex.org/W2109839728","https://openalex.org/W2120043042","https://openalex.org/W2126189887","https://openalex.org/W2135083016","https://openalex.org/W2137566700","https://openalex.org/W2141885858","https://openalex.org/W2144729032","https://openalex.org/W2147526198","https://openalex.org/W2150300758","https://openalex.org/W2151907070","https://openalex.org/W2153724411","https://openalex.org/W2154139219","https://openalex.org/W2164208203","https://openalex.org/W2169239268","https://openalex.org/W2395745621","https://openalex.org/W2416642098","https://openalex.org/W2607008053","https://openalex.org/W2766089502","https://openalex.org/W2766246727","https://openalex.org/W2792977793","https://openalex.org/W2793645414","https://openalex.org/W2938453170","https://openalex.org/W2950546693","https://openalex.org/W2951846698","https://openalex.org/W2952929332","https://openalex.org/W2963640180","https://openalex.org/W3095583226","https://openalex.org/W4206502013","https://openalex.org/W4210400672","https://openalex.org/W4210702584","https://openalex.org/W4233120011","https://openalex.org/W4244953938","https://openalex.org/W4297795096","https://openalex.org/W4297798671","https://openalex.org/W4298861701","https://openalex.org/W4299702438","https://openalex.org/W6681765588","https://openalex.org/W6822614433"],"related_works":["https://openalex.org/W2743276551","https://openalex.org/W2181549235","https://openalex.org/W2036521446","https://openalex.org/W2046212479","https://openalex.org/W2801459458","https://openalex.org/W2110156441","https://openalex.org/W2026660542","https://openalex.org/W3135130381","https://openalex.org/W2053803793","https://openalex.org/W2155238244"],"abstract_inverted_index":{"The":[0],"coding":[1,22,141],"space":[2,23,142],"of":[3,14,24,31,40,48,60,72,75,78,89,94,97,123,126,135,139,143,155,163,167,178],"protein":[4,27,188],"sequences":[5,32],"is":[6],"shaped":[7],"by":[8,12],"evolutionary":[9],"constraints":[10],"set":[11],"requirements":[13],"function":[15],"and":[16,56,129,183],"stability.":[17],"We":[18,54,119],"show":[19],"that":[20,34,106,117],"the":[21,58,90,95,121,136,140,144],"a":[25,147,159,176],"given":[26],"family-the":[28],"total":[29],"number":[30],"in":[33,170],"family-can":[35],"be":[36],"estimated":[37],"using":[38],"models":[39],"maximum":[41],"entropy":[42],"trained":[43],"on":[44,131],"multiple":[45],"sequence":[46,132],"alignments":[47],"naturally":[49],"occuring":[50],"amino":[51,80,83,109],"acid":[52,84,110],"sequences.":[53],"analyzed":[55],"calculated":[57],"size":[59],"three":[61],"abundant":[62],"repeat":[63],"proteins":[64,70],"families,":[65],"whose":[66],"members":[67],"are":[68],"large":[69],"made":[71],"many":[73,151],"repetitions":[74],"conserved":[76],"portions":[77],"\u223c30":[79],"acids.":[81],"While":[82],"conservation":[85],"at":[86,112],"each":[87,181],"position":[88],"alignment":[91],"explains":[92],"most":[93],"reduction":[96],"diversity":[98],"relative":[99],"to":[100],"completely":[101],"random":[102],"sequences,":[103],"we":[104],"found":[105],"correlations":[107],"between":[108],"usage":[111],"different":[113,124],"positions":[114],"significantly":[115],"impact":[116,122],"diversity.":[118,133],"quantified":[120],"types":[125],"correlations,":[127],"functional":[128],"evolutionary,":[130],"Analysis":[134],"detailed":[137],"structure":[138,174],"families":[145],"revealed":[146],"rugged":[148],"landscape,":[149],"with":[150,158],"local":[152],"energy":[153,165],"minima":[154],"varying":[156],"sizes":[157],"hierarchical":[160],"structure,":[161],"reminiscent":[162],"fustrated":[164],"landscapes":[166],"spin":[168],"glass":[169],"physics.":[171],"This":[172],"clustered":[173],"indicates":[175],"multiplicity":[177],"subtypes":[179],"within":[180],"family,":[182],"suggests":[184],"new":[185],"strategies":[186],"for":[187],"design.":[189]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":4},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-22T08:00:12.763002","created_date":"2025-10-10T00:00:00"}
