{"id":"https://openalex.org/W2796695790","doi":"https://doi.org/10.1093/bioinformatics/bty300","title":"GGRaSP: a R-package for selecting representative genomes using Gaussian mixture models","display_name":"GGRaSP: a R-package for selecting representative genomes using Gaussian mixture models","publication_year":2018,"publication_date":"2018-04-12","ids":{"openalex":"https://openalex.org/W2796695790","doi":"https://doi.org/10.1093/bioinformatics/bty300","mag":"2796695790","pmid":"https://pubmed.ncbi.nlm.nih.gov/29668840"},"language":"en","primary_location":{"id":"doi:10.1093/bioinformatics/bty300","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/bty300","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/34/17/3032/25702984/bty300.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://academic.oup.com/bioinformatics/article-pdf/34/17/3032/25702984/bty300.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080871519","display_name":"Thomas H. Clarke","orcid":"https://orcid.org/0000-0002-1916-1029"},"institutions":[{"id":"https://openalex.org/I1301553790","display_name":"J. Craig Venter Institute","ror":"https://ror.org/049r1ts75","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1301553790"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Thomas H Clarke","raw_affiliation_strings":["J. Craig Venter Institute, Rockville, MD, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"J. Craig Venter Institute, Rockville, MD, USA","institution_ids":["https://openalex.org/I1301553790"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083686393","display_name":"Lauren Brinkac","orcid":"https://orcid.org/0000-0002-7549-7280"},"institutions":[{"id":"https://openalex.org/I1301553790","display_name":"J. Craig Venter Institute","ror":"https://ror.org/049r1ts75","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1301553790"]},{"id":"https://openalex.org/I177181097","display_name":"Durban University of Technology","ror":"https://ror.org/0303y7a51","country_code":"ZA","type":"education","lineage":["https://openalex.org/I177181097"]}],"countries":["US","ZA"],"is_corresponding":false,"raw_author_name":"Lauren M Brinkac","raw_affiliation_strings":["Department of Biotechnology and Food Technology, Durban University of Technology, Durban, South Africa","J. Craig Venter Institute, Rockville, MD, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Biotechnology and Food Technology, Durban University of Technology, Durban, South Africa","institution_ids":["https://openalex.org/I177181097"]},{"raw_affiliation_string":"J. Craig Venter Institute, Rockville, MD, USA","institution_ids":["https://openalex.org/I1301553790"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111603387","display_name":"Granger Sutton","orcid":null},"institutions":[{"id":"https://openalex.org/I1301553790","display_name":"J. Craig Venter Institute","ror":"https://ror.org/049r1ts75","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1301553790"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Granger Sutton","raw_affiliation_strings":["J. Craig Venter Institute, Rockville, MD, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"J. Craig Venter Institute, Rockville, MD, USA","institution_ids":["https://openalex.org/I1301553790"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064828949","display_name":"Derrick E. Fouts","orcid":"https://orcid.org/0000-0003-4323-7668"},"institutions":[{"id":"https://openalex.org/I1301553790","display_name":"J. Craig Venter Institute","ror":"https://ror.org/049r1ts75","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1301553790"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Derrick E Fouts","raw_affiliation_strings":["J. Craig Venter Institute, Rockville, MD, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"J. Craig Venter Institute, Rockville, MD, USA","institution_ids":["https://openalex.org/I1301553790"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5080871519"],"corresponding_institution_ids":["https://openalex.org/I1301553790"],"apc_list":{"value":3618,"currency":"USD","value_usd":3618},"apc_paid":{"value":3618,"currency":"USD","value_usd":3618},"fwci":2.2422,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.8848055,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"34","issue":"17","first_page":"3032","last_page":"3034"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13664","display_name":"Genome Rearrangement Algorithms","score":0.791100025177002,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T13664","display_name":"Genome Rearrangement Algorithms","score":0.791100025177002,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.08389999717473984,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11901","display_name":"Bayesian Methods and Mixture Models","score":0.03189999982714653,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.8092052936553955},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5765214562416077},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5050192475318909},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.4936617314815521},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.49361932277679443},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.4409831166267395},{"id":"https://openalex.org/keywords/bacterial-genome-size","display_name":"Bacterial genome size","score":0.42906326055526733},{"id":"https://openalex.org/keywords/r-package","display_name":"R package","score":0.425091952085495},{"id":"https://openalex.org/keywords/genomics","display_name":"Genomics","score":0.411479651927948},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.41057533025741577},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.28862708806991577},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21217480301856995},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.09595531225204468}],"concepts":[{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.8092052936553955},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5765214562416077},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5050192475318909},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.4936617314815521},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.49361932277679443},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.4409831166267395},{"id":"https://openalex.org/C3742359","wikidata":"https://www.wikidata.org/wiki/Q4839988","display_name":"Bacterial genome size","level":4,"score":0.42906326055526733},{"id":"https://openalex.org/C2984074130","wikidata":"https://www.wikidata.org/wiki/Q73539779","display_name":"R package","level":2,"score":0.425091952085495},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.411479651927948},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.41057533025741577},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.28862708806991577},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21217480301856995},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.09595531225204468},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016011","descriptor_name":"Normal Distribution","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016011","descriptor_name":"Normal Distribution","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016011","descriptor_name":"Normal Distribution","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D023281","descriptor_name":"Genomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D023281","descriptor_name":"Genomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D023281","descriptor_name":"Genomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":4,"locations":[{"id":"doi:10.1093/bioinformatics/bty300","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/bty300","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/34/17/3032/25702984/bty300.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},{"id":"pmid:29668840","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/29668840","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics (Oxford, England)","raw_type":null},{"id":"pmh:oai:europepmc.org:5083554","is_oa":true,"landing_page_url":"http://europepmc.org/pmc/articles/PMC6129299","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"pmh:oai:pubmedcentral.nih.gov:6129299","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/6129299","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Bioinformatics","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1093/bioinformatics/bty300","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/bty300","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/34/17/3032/25702984/bty300.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1177099277","display_name":null,"funder_award_id":"U19AI110819","funder_id":"https://openalex.org/F4320306085","funder_display_name":"U.S. Department of Health and Human Services"},{"id":"https://openalex.org/G1198226865","display_name":null,"funder_award_id":"U19AI110819","funder_id":"https://openalex.org/F4320337355","funder_display_name":"National Institute of Allergy and Infectious Diseases"},{"id":"https://openalex.org/G1460674106","display_name":null,"funder_award_id":"U19AI110819","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G2448703573","display_name":null,"funder_award_id":"U19 AI110819","funder_id":"https://openalex.org/F4320337355","funder_display_name":"National Institute of Allergy and Infectious Diseases"}],"funders":[{"id":"https://openalex.org/F4320306085","display_name":"U.S. Department of Health and Human Services","ror":"https://ror.org/033jnv181"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337355","display_name":"National Institute of Allergy and Infectious Diseases","ror":"https://ror.org/043z4tv69"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2796695790.pdf","grobid_xml":"https://content.openalex.org/works/W2796695790.grobid-xml"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W1661708608","https://openalex.org/W1735309556","https://openalex.org/W1892179103","https://openalex.org/W2103901746","https://openalex.org/W2120187241","https://openalex.org/W2121628067","https://openalex.org/W2151409320","https://openalex.org/W2169335408","https://openalex.org/W2336330109","https://openalex.org/W2514474978","https://openalex.org/W2563724683","https://openalex.org/W2580518756","https://openalex.org/W2581142702","https://openalex.org/W2950150251","https://openalex.org/W2952935047"],"related_works":["https://openalex.org/W2340204314","https://openalex.org/W2238553258","https://openalex.org/W2606743538","https://openalex.org/W2365831248","https://openalex.org/W2921288124","https://openalex.org/W3011678041","https://openalex.org/W2093610003","https://openalex.org/W2101115907","https://openalex.org/W3177323319","https://openalex.org/W2041267682"],"abstract_inverted_index":{"Motivation:":[0],"The":[1,62,97],"vast":[2],"number":[3],"of":[4,13,35,48,53,77,83,94,124,131,138],"available":[5,157,165],"sequenced":[6],"bacterial":[7],"genomes":[8,78,82,133],"occasionally":[9],"exceeds":[10],"the":[11,36,86,92,106,122],"facilities":[12],"comparative":[14],"genomic":[15,50,107,136],"methods":[16],"or":[17,58],"is":[18,32,156],"dominated":[19],"by":[20,104,126,145,149],"a":[21,27,41,74,110,128,135],"single":[22],"outbreak":[23],"strain,":[24],"and":[25,29,45,148,153],"thus":[26],"diverse":[28],"representative":[30],"subset":[31,38,76],"required.":[33],"Generation":[34],"reduced":[37,75,129],"currently":[39],"requires":[40],"priori":[42],"supervised":[43],"clustering":[44,103],"sequence-only":[46],"selection":[47,144],"medoid":[49],"sequences,":[51],"independent":[52],"any":[54],"additional":[55],"genome":[56,150],"metrics":[57],"strain":[59,147],"attributes.":[60],"Results:":[61],"Gaussian":[63,111],"Genome":[64],"Representative":[65],"Selector":[66],"with":[67],"Prioritization":[68],"(GGRaSP)":[69],"R-package":[70],"described":[71],"below":[72],"generates":[73],"that":[79],"prioritizes":[80],"maintaining":[81],"interest":[84],"to":[85,114],"user":[87],"as":[88,90],"well":[89],"minimizing":[91],"loss":[93],"genetic":[95],"variation.":[96],"package":[98],"also":[99],"allows":[100],"for":[101],"unsupervised":[102],"modeling":[105],"relationships":[108],"using":[109],"mixture":[112],"model":[113],"select":[115],"an":[116],"appropriate":[117],"cluster":[118],"threshold.":[119],"We":[120],"demonstrate":[121],"capabilities":[123],"GGRaSP":[125,155],"generating":[127],"list":[130],"315":[132],"from":[134],"dataset":[137],"4600":[139],"Escherichia":[140],"coli":[141],"genomes,":[142],"prioritizing":[143],"type":[146],"completeness.":[151],"Availability":[152],"implementaion:":[154],"at":[158,166],"https://github.com/JCVenterInstitute/ggrasp/.":[159],"Supplementary":[160,162],"information:":[161],"data":[163],"are":[164],"Bioinformatics":[167],"online.":[168]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1}],"updated_date":"2026-07-02T09:51:11.867554","created_date":"2025-10-10T00:00:00"}
