{"id":"https://openalex.org/W2108166985","doi":"https://doi.org/10.1186/1471-2105-11-187","title":"Artificial and natural duplicates in pyrosequencing reads of metagenomic data","display_name":"Artificial and natural duplicates in pyrosequencing reads of metagenomic data","publication_year":2010,"publication_date":"2010-04-13","ids":{"openalex":"https://openalex.org/W2108166985","doi":"https://doi.org/10.1186/1471-2105-11-187","mag":"2108166985","pmid":"https://pubmed.ncbi.nlm.nih.gov/20388221"},"language":"en","primary_location":{"id":"doi:10.1186/1471-2105-11-187","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-11-187","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-11-187","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-11-187","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008702128","display_name":"Beifang Niu","orcid":"https://orcid.org/0000-0002-7448-7793"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Beifang Niu","raw_affiliation_strings":["California Institute for Telecommunications and Information Technology, University of California San Diego, La Jolla, California 92093, USA","California Institute for Telecommunications and Information Technology, University of California San Diego, La Jolla, California, 92093, USA"],"affiliations":[{"raw_affiliation_string":"California Institute for Telecommunications and Information Technology, University of California San Diego, La Jolla, California 92093, USA","institution_ids":[]},{"raw_affiliation_string":"California Institute for Telecommunications and Information Technology, University of California San Diego, La Jolla, California, 92093, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111860308","display_name":"LiMin Fu","orcid":null},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Limin Fu","raw_affiliation_strings":["California Institute for Telecommunications and Information Technology, University of California San Diego, La Jolla, California, 92093, USA"],"affiliations":[{"raw_affiliation_string":"California Institute for Telecommunications and Information Technology, University of California San Diego, La Jolla, California, 92093, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109141637","display_name":"Shulei Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shulei Sun","raw_affiliation_strings":["Center for Research in Biological Systems, University of California San Diego, La Jolla, California, 92093, USA"],"affiliations":[{"raw_affiliation_string":"Center for Research in Biological Systems, University of California San Diego, La Jolla, California, 92093, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027145422","display_name":"Weizhong Li","orcid":"https://orcid.org/0000-0003-1804-9403"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weizhong Li","raw_affiliation_strings":["California Institute for Telecommunications and Information Technology, University of California San Diego, La Jolla, California, 92093, USA","Center for Research in Biological Systems, University of California San Diego, La Jolla, California, 92093, USA"],"affiliations":[{"raw_affiliation_string":"California Institute for Telecommunications and Information Technology, University of California San Diego, La Jolla, California, 92093, USA","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"Center for Research in Biological Systems, University of California San Diego, La Jolla, California, 92093, USA","institution_ids":["https://openalex.org/I36258959"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5008702128"],"corresponding_institution_ids":["https://openalex.org/I36258959"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":15.0453,"has_fulltext":true,"cited_by_count":272,"citation_normalized_percentile":{"value":0.99451326,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"11","issue":"1","first_page":"187","last_page":"187"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11791","display_name":"Microbial Community Ecology and Physiology","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metagenomics","display_name":"Metagenomics","score":0.946044921875},{"id":"https://openalex.org/keywords/pyrosequencing","display_name":"Pyrosequencing","score":0.8711166381835938},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.6180240511894226},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.5544506907463074},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.5054953098297119},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4680843949317932},{"id":"https://openalex.org/keywords/human-microbiome-project","display_name":"Human Microbiome Project","score":0.45260635018348694},{"id":"https://openalex.org/keywords/dna-microarray","display_name":"DNA microarray","score":0.4331006407737732},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.35146820545196533},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33634620904922485},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.2871566116809845},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.2614537477493286},{"id":"https://openalex.org/keywords/ecology","display_name":"Ecology","score":0.08555558323860168}],"concepts":[{"id":"https://openalex.org/C15151743","wikidata":"https://www.wikidata.org/wiki/Q903778","display_name":"Metagenomics","level":3,"score":0.946044921875},{"id":"https://openalex.org/C125009961","wikidata":"https://www.wikidata.org/wiki/Q2342248","display_name":"Pyrosequencing","level":3,"score":0.8711166381835938},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.6180240511894226},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.5544506907463074},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.5054953098297119},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4680843949317932},{"id":"https://openalex.org/C190944805","wikidata":"https://www.wikidata.org/wiki/Q3922751","display_name":"Human Microbiome Project","level":4,"score":0.45260635018348694},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.4331006407737732},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.35146820545196533},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33634620904922485},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.2871566116809845},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.2614537477493286},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.08555558323860168},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D020407","descriptor_name":"Internet","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D020407","descriptor_name":"Internet","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D020407","descriptor_name":"Internet","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D020407","descriptor_name":"Internet","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030541","descriptor_name":"Databases, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030541","descriptor_name":"Databases, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030541","descriptor_name":"Databases, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030541","descriptor_name":"Databases, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D056186","descriptor_name":"Metagenomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D056186","descriptor_name":"Metagenomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D056186","descriptor_name":"Metagenomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D056186","descriptor_name":"Metagenomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":4,"locations":[{"id":"doi:10.1186/1471-2105-11-187","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-11-187","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-11-187","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmid:20388221","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/20388221","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC bioinformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:4521b6f16c114f38b537925139b18aa3","is_oa":true,"landing_page_url":"https://doaj.org/article/4521b6f16c114f38b537925139b18aa3","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 11, Iss 1, p 187 (2010)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:2874554","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/2874554","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/1471-2105-11-187","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-11-187","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-11-187","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4300000071525574,"display_name":"Life in Land","id":"https://metadata.un.org/sdg/15"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306202","display_name":"Gordon and Betty Moore Foundation","ror":"https://ror.org/006wxqw41"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337375","display_name":"National Center for Research Resources","ror":"https://ror.org/04pw6fb54"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2108166985.pdf","grobid_xml":"https://content.openalex.org/works/W2108166985.grobid-xml"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W1986473212","https://openalex.org/W2001031283","https://openalex.org/W2037532428","https://openalex.org/W2055666215","https://openalex.org/W2061680337","https://openalex.org/W2093830129","https://openalex.org/W2106882534","https://openalex.org/W2108211735","https://openalex.org/W2113601822","https://openalex.org/W2114200698","https://openalex.org/W2114548104","https://openalex.org/W2124637227","https://openalex.org/W2126809954","https://openalex.org/W2128114769","https://openalex.org/W2130077037","https://openalex.org/W2131342905","https://openalex.org/W2132415967","https://openalex.org/W2135083016","https://openalex.org/W2141561400","https://openalex.org/W2145336165","https://openalex.org/W2149573313","https://openalex.org/W2156125289"],"related_works":["https://openalex.org/W2108166985","https://openalex.org/W1222064643","https://openalex.org/W1543301402","https://openalex.org/W1604004112","https://openalex.org/W3023731925","https://openalex.org/W2786547608","https://openalex.org/W2086098522","https://openalex.org/W2950883119","https://openalex.org/W4283818647","https://openalex.org/W2158877985"],"abstract_inverted_index":{"Our":[0],"method":[1],"is":[2,15],"available":[3],"from":[4,23],"http://cd-hit.org":[5],"as":[6],"a":[7,11,39],"downloadable":[8],"program":[9],"and":[10],"web":[12],"server.":[13],"It":[14],"important":[16],"not":[17],"only":[18],"to":[19,28,41,49,58],"identify":[20],"the":[21,43],"duplicates":[22,47,62],"metagenomic":[24],"datasets":[25],"but":[26],"also":[27],"distinguish":[29],"whether":[30,57],"they":[31],"are":[32],"artificial":[33],"or":[34,60],"natural":[35,46],"duplicates.":[36],"We":[37],"provide":[38],"tool":[40],"estimate":[42],"number":[44],"of":[45],"according":[48],"user-defined":[50],"sample":[51],"types,":[52],"so":[53],"users":[54],"can":[55],"decide":[56],"retain":[59],"remove":[61],"in":[63],"their":[64],"projects.":[65]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":13},{"year":2017,"cited_by_count":15},{"year":2016,"cited_by_count":19},{"year":2015,"cited_by_count":29},{"year":2014,"cited_by_count":33},{"year":2013,"cited_by_count":54},{"year":2012,"cited_by_count":30}],"updated_date":"2026-04-12T07:58:50.170612","created_date":"2025-10-10T00:00:00"}
