{"id":"https://openalex.org/W2692027428","doi":"https://doi.org/10.1093/bioinformatics/btx400","title":"SANDPUMA: ensemble predictions of nonribosomal peptide chemistry reveal biosynthetic diversity across Actinobacteria","display_name":"SANDPUMA: ensemble predictions of nonribosomal peptide chemistry reveal biosynthetic diversity across Actinobacteria","publication_year":2017,"publication_date":"2017-06-19","ids":{"openalex":"https://openalex.org/W2692027428","doi":"https://doi.org/10.1093/bioinformatics/btx400","mag":"2692027428","pmid":"https://pubmed.ncbi.nlm.nih.gov/28633438","pmcid":"https://www.ncbi.nlm.nih.gov/pmc/articles/5860034"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btx400","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/33/20/3202/25165615/btx400.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4811","1367-4803"],"is_oa":true,"is_in_doaj":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311647","https://openalex.org/P4310311648"],"host_organization_lineage_names":["University of Oxford","Oxford University Press"],"type":"journal"},"license":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://academic.oup.com/bioinformatics/article-pdf/33/20/3202/25165615/btx400.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079313598","display_name":"Marc G. Chevrette","orcid":"https://orcid.org/0000-0002-7209-0717"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I1304256225","https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Marc G. Chevrette","raw_affiliation_string":"Department of Genetics, University of Wisconsin-Madison, Madison, WI, USA","raw_affiliation_strings":["Department of Genetics, University of Wisconsin-Madison, Madison, WI, USA"]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059411032","display_name":"Fabian Aicheler","orcid":null},"institutions":[{"id":"https://openalex.org/I8087733","display_name":"University of T\u00fcbingen","ror":"https://ror.org/03a1kwz48","country_code":"DE","type":"education","lineage":["https://openalex.org/I8087733"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Fabian Aicheler","raw_affiliation_string":"Applied Bioinformatics, Department of Computer Science, Quantitative Biology Center and Center for Bioinformatics, University of T\u00fcbingen, T\u00fcbingen, Germany","raw_affiliation_strings":["Applied Bioinformatics, Department of Computer Science, Quantitative Biology Center and Center for Bioinformatics, University of T\u00fcbingen, T\u00fcbingen, Germany"]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053621010","display_name":"Oliver Kohlbacher","orcid":"https://orcid.org/0000-0003-1739-4598"},"institutions":[{"id":"https://openalex.org/I8087733","display_name":"University of T\u00fcbingen","ror":"https://ror.org/03a1kwz48","country_code":"DE","type":"education","lineage":["https://openalex.org/I8087733"]},{"id":"https://openalex.org/I4210112458","display_name":"Max Planck Institute for Developmental Biology","ror":"https://ror.org/022jc0g24","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210112458"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Oliver Kohlbacher","raw_affiliation_string":"Applied Bioinformatics, Department of Computer Science, Quantitative Biology Center and Center for Bioinformatics, University of T\u00fcbingen, T\u00fcbingen, Germany; Biomolecular Interactions, Max Planck Institute for Developmental Biology, T\u00fcbingen, Germany","raw_affiliation_strings":["Applied Bioinformatics, Department of Computer Science, Quantitative Biology Center and Center for Bioinformatics, University of T\u00fcbingen, T\u00fcbingen, Germany","Biomolecular Interactions, Max Planck Institute for Developmental Biology, T\u00fcbingen, Germany"]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053720408","display_name":"Cameron R. Currie","orcid":"https://orcid.org/0000-0003-3104-3997"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I1304256225","https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cameron R. Currie","raw_affiliation_string":"Department of Bacteriology and J. F. Crow Institute for the Study of Evolution, University of Wisconsin-Madison, Madison, WI, USA","raw_affiliation_strings":["Department of Bacteriology and J. F. Crow Institute for the Study of Evolution, University of Wisconsin-Madison, Madison, WI, USA"]},{"author_position":"last","author":{"id":"https://openalex.org/A5034845813","display_name":"Marnix H. Medema","orcid":"https://orcid.org/0000-0002-2191-2821"},"institutions":[{"id":"https://openalex.org/I913481162","display_name":"Wageningen University & Research","ror":"https://ror.org/04qw24q55","country_code":"NL","type":"education","lineage":["https://openalex.org/I913481162"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Marnix H. Medema","raw_affiliation_string":"Bioinformatics Group, Wageningen University, Wageningen, The Netherlands","raw_affiliation_strings":["Bioinformatics Group, Wageningen University, Wageningen, The Netherlands"]}],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5079313598","https://openalex.org/A5034845813"],"corresponding_institution_ids":["https://openalex.org/I135310074","https://openalex.org/I913481162"],"apc_list":{"value":3618,"currency":"USD","value_usd":3618,"provenance":"doaj"},"apc_paid":{"value":3618,"currency":"USD","value_usd":3618,"provenance":"doaj"},"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":77,"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"33","issue":"20","first_page":"3202","last_page":"3210"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10252","display_name":"Natural Products as Sources of New Drugs","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/2736","display_name":"Pharmacology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10252","display_name":"Natural Products as Sources of New Drugs","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/2736","display_name":"Pharmacology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10015","display_name":"RNA Sequencing Data Analysis","score":0.9995,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"Ribosome Structure and Translation Mechanisms","score":0.9951,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"keyword":"biosynthetic diversity","score":0.4737},{"keyword":"nonribosomal peptide chemistry","score":0.4296}],"concepts":[{"id":"https://openalex.org/C2777379556","wikidata":"https://www.wikidata.org/wiki/Q1443801","display_name":"Nonribosomal peptide","level":4,"score":0.9234488},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.64901423},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.52967536},{"id":"https://openalex.org/C2778169975","wikidata":"https://www.wikidata.org/wiki/Q130914","display_name":"Actinobacteria","level":4,"score":0.43268317},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3468331},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.2869187},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.1934686},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.14220953},{"id":"https://openalex.org/C553450214","wikidata":"https://www.wikidata.org/wiki/Q851162","display_name":"Biosynthesis","level":3,"score":0.0},{"id":"https://openalex.org/C42062724","wikidata":"https://www.wikidata.org/wiki/Q1209205","display_name":"16S ribosomal RNA","level":3,"score":0.0}],"mesh":[{"descriptor_ui":"D039903","descriptor_name":"Actinobacteria","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":"","qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D010453","descriptor_name":"Peptide Synthases","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D010455","descriptor_name":"Peptides","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":true},{"descriptor_ui":"D020539","descriptor_name":"Sequence Analysis, Protein","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":true},{"descriptor_ui":"D039903","descriptor_name":"Actinobacteria","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D039903","descriptor_name":"Actinobacteria","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D039903","descriptor_name":"Actinobacteria","qualifier_ui":"Q000201","qualifier_name":"enzymology","is_major_topic":false},{"descriptor_ui":"D020134","descriptor_name":"Catalytic Domain","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D005810","descriptor_name":"Multigene Family","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010453","descriptor_name":"Peptide Synthases","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010455","descriptor_name":"Peptides","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D020539","descriptor_name":"Sequence Analysis, Protein","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D013379","descriptor_name":"Substrate Specificity","qualifier_ui":"","qualifier_name":null,"is_major_topic":false}],"locations_count":4,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btx400","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/33/20/3202/25165615/btx400.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4811","1367-4803"],"is_oa":true,"is_in_doaj":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311647","https://openalex.org/P4310311648"],"host_organization_lineage_names":["University of Oxford","Oxford University Press"],"type":"journal"},"license":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://europepmc.org/articles/pmc5860034","pdf_url":"https://europepmc.org/articles/pmc5860034?pdf=render","source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":["European Bioinformatics Institute"],"type":"repository"},"license":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5860034","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/28633438","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btx400","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/33/20/3202/25165615/btx400.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4811","1367-4803"],"is_oa":true,"is_in_doaj":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311647","https://openalex.org/P4310311648"],"host_organization_lineage_names":["University of Oxford","Oxford University Press"],"type":"journal"},"license":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.5,"id":"https://metadata.un.org/sdg/17","display_name":"Partnerships for the goals"}],"grants":[{"funder":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health","award_id":"U19 Al109673"}],"referenced_works_count":46,"referenced_works":["https://openalex.org/W1934400729","https://openalex.org/W1957185080","https://openalex.org/W1977385550","https://openalex.org/W1978609583","https://openalex.org/W1982793579","https://openalex.org/W1990935073","https://openalex.org/W2009257824","https://openalex.org/W2013619747","https://openalex.org/W2031611770","https://openalex.org/W2039467082","https://openalex.org/W2041537722","https://openalex.org/W2045204781","https://openalex.org/W2049782367","https://openalex.org/W2058087053","https://openalex.org/W2062185057","https://openalex.org/W2065679186","https://openalex.org/W2073886101","https://openalex.org/W2076660730","https://openalex.org/W2080895748","https://openalex.org/W2108433768","https://openalex.org/W2114839447","https://openalex.org/W2116601594","https://openalex.org/W2126714764","https://openalex.org/W2131887397","https://openalex.org/W2132924229","https://openalex.org/W2135639274","https://openalex.org/W2137015675","https://openalex.org/W2138122982","https://openalex.org/W2139119508","https://openalex.org/W2141052558","https://openalex.org/W2143485490","https://openalex.org/W2160378127","https://openalex.org/W2161394740","https://openalex.org/W2167918062","https://openalex.org/W2187341651","https://openalex.org/W2229450847","https://openalex.org/W2240498000","https://openalex.org/W2300627010","https://openalex.org/W2332821092","https://openalex.org/W2338181866","https://openalex.org/W2412308152","https://openalex.org/W2504691963","https://openalex.org/W2508980186","https://openalex.org/W2543967261","https://openalex.org/W2609551972","https://openalex.org/W4214754383"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W1523578163","https://openalex.org/W2029300483","https://openalex.org/W2158068445","https://openalex.org/W1510529932","https://openalex.org/W2098442675","https://openalex.org/W4206321595","https://openalex.org/W4205916385","https://openalex.org/W4206053145","https://openalex.org/W2359034609"],"ngrams_url":"https://api.openalex.org/works/W2692027428/ngrams","abstract_inverted_index":{"Abstract":[0],"Summary":[1],"Nonribosomally":[2],"synthesized":[3],"peptides":[4],"(NRPs)":[5],"are":[6,203],"natural":[7],"products":[8],"with":[9,43],"widespread":[10],"applications":[11],"in":[12,47,134],"medicine":[13],"and":[14,39,41,54,167,177,185],"biotechnology.":[15],"Many":[16],"algorithms":[17,89,104],"have":[18,62],"been":[19,155],"developed":[20,110],"to":[21],"predict":[22],"the":[23,77,158,193],"substrate":[24],"specificities":[25],"of":[26,57,79,81,97,121,138],"nonribosomal":[27],"peptide":[28],"synthetase":[29],"adenylation":[30],"(A)":[31],"domains":[32],"from":[33],"DNA":[34],"sequences,":[35,100],"which":[36,74,125],"enables":[37],"prioritization":[38],"dereplication,":[40],"integration":[42],"other":[44],"data":[45,53,202],"types":[46],"discovery":[48],"efforts.":[49],"However,":[50],"insufficient":[51],"training":[52],"a":[55,70,91,112,135,187],"lack":[56],"clarity":[58],"regarding":[59],"prediction":[60],"quality":[61],"impeded":[63],"optimal":[64],"use.":[65],"Here,":[66],"we":[67,109,131],"introduce":[68],"prediCAT,":[69],"new":[71],"phylogenetics-inspired":[72],"algorithm,":[73,115],"quantitatively":[75],"estimates":[76],"degree":[78],"predictability":[80],"each":[82],"A-domain.":[83],"We":[84],"then":[85],"systematically":[86],"benchmarked":[87],"all":[88,122],"on":[90,117],"newly":[92,118],"gathered,":[93],"independent":[94],"test":[95],"set":[96],"434":[98],"A-domain":[99],"showing":[101],"that":[102,143],"active-site-motif-based":[103],"outperform":[105],"whole-domain-based":[106],"methods.":[107,129],"Subsequently,":[108],"SANDPUMA,":[111],"powerful":[113],"ensemble":[114],"based":[116],"trained":[119],"versions":[120],"high-performing":[123],"algorithms,":[124],"significantly":[126],"outperforms":[127],"individual":[128],"Finally,":[130],"deployed":[132],"SANDPUMA":[133,153,179],"systematic":[136],"investigation":[137],"7635":[139],"Actinobacteria":[140],"genomes,":[141],"suggesting":[142],"NRP":[144],"chemical":[145],"diversity":[146],"is":[147,168,180],"much":[148],"higher":[149],"than":[150],"previously":[151],"estimated.":[152],"has":[154],"integrated":[156],"into":[157],"widely":[159],"used":[160],"antiSMASH":[161],"biosynthetic":[162],"gene":[163],"cluster":[164],"analysis":[165],"pipeline":[166],"also":[169],"available":[170,182,204],"as":[171,186],"an":[172],"open-source,":[173],"standalone":[174],"tool.":[175],"Availability":[176],"implementation":[178],"freely":[181],"at":[183,190,205],"https://bitbucket.org/chevrm/sandpuma":[184],"docker":[188],"image":[189],"https://hub.docker.com/r/chevrm/sandpuma/":[191],"under":[192],"GNU":[194],"Public":[195],"License":[196],"3":[197],"(GPL3).":[198],"Supplementary":[199,201],"information":[200],"Bioinformatics":[206],"online.":[207]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2692027428","counts_by_year":[{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":17},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":16},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":3}],"updated_date":"2024-03-18T01:05:43.153623","created_date":"2017-06-30"}