{"id":"https://openalex.org/W1994456450","doi":"https://doi.org/10.1186/1471-2105-9-182","title":"Gene identification and protein classification in microbial metagenomic sequence data via incremental clustering","display_name":"Gene identification and protein classification in microbial metagenomic sequence data via incremental clustering","publication_year":2008,"publication_date":"2008-04-10","ids":{"openalex":"https://openalex.org/W1994456450","doi":"https://doi.org/10.1186/1471-2105-9-182","mag":"1994456450","pmid":"https://pubmed.ncbi.nlm.nih.gov/18402669"},"language":"en","primary_location":{"id":"doi:10.1186/1471-2105-9-182","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-9-182","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-9-182","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-9-182","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015513138","display_name":"Shibu Yooseph","orcid":"https://orcid.org/0000-0001-5581-5002"},"institutions":[{"id":"https://openalex.org/I1301553790","display_name":"J. Craig Venter Institute","ror":"https://ror.org/049r1ts75","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1301553790"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shibu Yooseph","raw_affiliation_strings":["J, Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA. syooseph@venterinstitute.org","J. Craig Venter Institute,Rockville,USA"],"affiliations":[{"raw_affiliation_string":"J, Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD 20850, USA. syooseph@venterinstitute.org","institution_ids":["https://openalex.org/I1301553790"]},{"raw_affiliation_string":"J. Craig Venter Institute,Rockville,USA","institution_ids":["https://openalex.org/I1301553790"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027145422","display_name":"Weizhong Li","orcid":"https://orcid.org/0000-0003-1804-9403"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weizhong Li","raw_affiliation_strings":["California Institute for Telecommunications and Information Technology, University of California, San Diego, 9500 Gilman Drive, La Jolla, CA, 92093, USA","California Institute for Telecommunications and Information Technology, University of California, San Diego, La Jolla, USA"],"affiliations":[{"raw_affiliation_string":"California Institute for Telecommunications and Information Technology, University of California, San Diego, 9500 Gilman Drive, La Jolla, CA, 92093, USA","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"California Institute for Telecommunications and Information Technology, University of California, San Diego, La Jolla, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111603387","display_name":"Granger Sutton","orcid":null},"institutions":[{"id":"https://openalex.org/I1301553790","display_name":"J. Craig Venter Institute","ror":"https://ror.org/049r1ts75","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1301553790"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Granger Sutton","raw_affiliation_strings":["J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD, 20850, USA","J. Craig Venter Institute,Rockville,USA"],"affiliations":[{"raw_affiliation_string":"J. Craig Venter Institute, 9704 Medical Center Drive, Rockville, MD, 20850, USA","institution_ids":["https://openalex.org/I1301553790"]},{"raw_affiliation_string":"J. Craig Venter Institute,Rockville,USA","institution_ids":["https://openalex.org/I1301553790"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5015513138"],"corresponding_institution_ids":["https://openalex.org/I1301553790"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":3.1707,"has_fulltext":true,"cited_by_count":50,"citation_normalized_percentile":{"value":0.9190838,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"9","issue":"1","first_page":"182","last_page":"182"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11048","display_name":"Bacteriophages and microbial interactions","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metagenomics","display_name":"Metagenomics","score":0.9241372346878052},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7596125602722168},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.6231354475021362},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5683905482292175},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.5168819427490234},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.49587926268577576},{"id":"https://openalex.org/keywords/protein-sequencing","display_name":"Protein sequencing","score":0.4464412331581116},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.37661126255989075},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3582385778427124},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.30312228202819824},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24827921390533447},{"id":"https://openalex.org/keywords/peptide-sequence","display_name":"Peptide sequence","score":0.12155717611312866}],"concepts":[{"id":"https://openalex.org/C15151743","wikidata":"https://www.wikidata.org/wiki/Q903778","display_name":"Metagenomics","level":3,"score":0.9241372346878052},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7596125602722168},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.6231354475021362},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5683905482292175},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.5168819427490234},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.49587926268577576},{"id":"https://openalex.org/C10010492","wikidata":"https://www.wikidata.org/wiki/Q3142557","display_name":"Protein sequencing","level":4,"score":0.4464412331581116},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37661126255989075},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3582385778427124},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.30312228202819824},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24827921390533447},{"id":"https://openalex.org/C167625842","wikidata":"https://www.wikidata.org/wiki/Q899763","display_name":"Peptide sequence","level":3,"score":0.12155717611312866},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001426","descriptor_name":"Bacterial Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D001426","descriptor_name":"Bacterial Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D001426","descriptor_name":"Bacterial Proteins","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D001483","descriptor_name":"Base Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001483","descriptor_name":"Base Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001483","descriptor_name":"Base Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D002874","descriptor_name":"Chromosome Mapping","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D002874","descriptor_name":"Chromosome Mapping","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D002874","descriptor_name":"Chromosome Mapping","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D005810","descriptor_name":"Multigene Family","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D005810","descriptor_name":"Multigene Family","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D005810","descriptor_name":"Multigene Family","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D008969","descriptor_name":"Molecular Sequence Data","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008969","descriptor_name":"Molecular Sequence Data","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008969","descriptor_name":"Molecular Sequence Data","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010363","descriptor_name":"Pattern Recognition, Automated","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D010363","descriptor_name":"Pattern Recognition, Automated","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D010363","descriptor_name":"Pattern Recognition, Automated","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016000","descriptor_name":"Cluster Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016366","descriptor_name":"Open Reading Frames","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D016366","descriptor_name":"Open Reading Frames","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D016366","descriptor_name":"Open Reading Frames","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":4,"locations":[{"id":"doi:10.1186/1471-2105-9-182","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-9-182","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-9-182","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmid:18402669","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/18402669","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC bioinformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:193c2270eb0349bc986447157dbcea0b","is_oa":true,"landing_page_url":"https://doaj.org/article/193c2270eb0349bc986447157dbcea0b","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 9, Iss 1, p 182 (2008)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:2362130","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/2362130","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/1471-2105-9-182","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-9-182","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-9-182","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306202","display_name":"Gordon and Betty Moore Foundation","ror":"https://ror.org/006wxqw41"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1994456450.pdf","grobid_xml":"https://content.openalex.org/works/W1994456450.grobid-xml"},"referenced_works_count":48,"referenced_works":["https://openalex.org/W1791999417","https://openalex.org/W1930561973","https://openalex.org/W1971921744","https://openalex.org/W1974383394","https://openalex.org/W2001660787","https://openalex.org/W2021380741","https://openalex.org/W2036662864","https://openalex.org/W2042893247","https://openalex.org/W2055043387","https://openalex.org/W2063573248","https://openalex.org/W2085277871","https://openalex.org/W2093830129","https://openalex.org/W2096525273","https://openalex.org/W2098290178","https://openalex.org/W2108067237","https://openalex.org/W2108211735","https://openalex.org/W2111373249","https://openalex.org/W2113350312","https://openalex.org/W2113601822","https://openalex.org/W2114200698","https://openalex.org/W2119632499","https://openalex.org/W2124637227","https://openalex.org/W2126809954","https://openalex.org/W2128114769","https://openalex.org/W2132415967","https://openalex.org/W2132926880","https://openalex.org/W2135083016","https://openalex.org/W2138270253","https://openalex.org/W2140960177","https://openalex.org/W2141885858","https://openalex.org/W2143235667","https://openalex.org/W2145336165","https://openalex.org/W2147667050","https://openalex.org/W2150337627","https://openalex.org/W2152873040","https://openalex.org/W2156125289","https://openalex.org/W2156701707","https://openalex.org/W2158714788","https://openalex.org/W2160364329","https://openalex.org/W2160582562","https://openalex.org/W2161794223","https://openalex.org/W2164429509","https://openalex.org/W2625137958","https://openalex.org/W4210400672","https://openalex.org/W4210580032","https://openalex.org/W4245543257","https://openalex.org/W4256395558","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W638577851","https://openalex.org/W2968354375","https://openalex.org/W2359440920","https://openalex.org/W2889550857","https://openalex.org/W3013372948","https://openalex.org/W3135997498","https://openalex.org/W4242417330","https://openalex.org/W3198631238","https://openalex.org/W4388282505","https://openalex.org/W3044162010"],"abstract_inverted_index":{"The":[0,18],"clustering":[1,20],"paradigm":[2],"is":[3,22],"shown":[4,23],"to":[5,24],"be":[6,25],"a":[7,51,57],"very":[8],"useful":[9],"tool":[10],"in":[11,32,50],"the":[12,29],"analysis":[13],"of":[14,62],"microbial":[15],"metagenomic":[16,52],"data.":[17],"incremental":[19],"method":[21],"much":[26],"faster":[27],"than":[28],"original":[30],"approach":[31],"identifying":[33,43],"genes,":[34],"grouping":[35],"sequences":[36],"into":[37],"existing":[38],"protein":[39,63],"families,":[40],"and":[41],"also":[42],"novel":[44],"families":[45],"that":[46],"have":[47],"multiple":[48],"members":[49],"dataset.":[53],"These":[54],"clusters":[55],"provide":[56],"basis":[58],"for":[59],"further":[60],"studies":[61],"families.":[64]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":5},{"year":2012,"cited_by_count":6}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
