{"id":"https://openalex.org/W2163584430","doi":"https://doi.org/10.1186/1471-2105-12-333","title":"Efficient counting of k-mers in DNA sequences using a bloom filter","display_name":"Efficient counting of k-mers in DNA sequences using a bloom filter","publication_year":2011,"publication_date":"2011-08-10","ids":{"openalex":"https://openalex.org/W2163584430","doi":"https://doi.org/10.1186/1471-2105-12-333","mag":"2163584430","pmid":"https://pubmed.ncbi.nlm.nih.gov/21831268"},"language":"en","primary_location":{"id":"doi:10.1186/1471-2105-12-333","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-12-333","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-12-333","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-12-333","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018556494","display_name":"P\u00e1ll Melsted","orcid":"https://orcid.org/0000-0002-8418-6724"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"P\u00e1ll Melsted","raw_affiliation_strings":["Department of Human Genetics, The University of Chicago, Chicago, IL 60637, USA. pmelsted@gmail.com","Department of Human Genetics, The University of Chicago, Chicago, IL, 60637, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Human Genetics, The University of Chicago, Chicago, IL 60637, USA. pmelsted@gmail.com","institution_ids":["https://openalex.org/I40347166"]},{"raw_affiliation_string":"Department of Human Genetics, The University of Chicago, Chicago, IL, 60637, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051388103","display_name":"Jonathan K. Pritchard","orcid":"https://orcid.org/0000-0002-8828-5236"},"institutions":[{"id":"https://openalex.org/I1344073410","display_name":"Howard Hughes Medical Institute","ror":"https://ror.org/006w34k90","country_code":"US","type":"facility","lineage":["https://openalex.org/I1344073410"]},{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jonathan K Pritchard","raw_affiliation_strings":["Department of Human Genetics, The University of Chicago, Chicago, IL, 60637, USA","Howard Hughes Medical Institute, The University of Chicago, Chicago, IL, 60637, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Human Genetics, The University of Chicago, Chicago, IL, 60637, USA","institution_ids":["https://openalex.org/I40347166"]},{"raw_affiliation_string":"Howard Hughes Medical Institute, The University of Chicago, Chicago, IL, 60637, USA","institution_ids":["https://openalex.org/I1344073410","https://openalex.org/I40347166"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5018556494","https://openalex.org/A5051388103"],"corresponding_institution_ids":["https://openalex.org/I1344073410","https://openalex.org/I40347166"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":5.9881,"has_fulltext":true,"cited_by_count":301,"citation_normalized_percentile":{"value":0.9726804,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"12","issue":"1","first_page":"333","last_page":"333"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.603600025177002,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.603600025177002,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13664","display_name":"Genome Rearrangement Algorithms","score":0.1738000065088272,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.1362999975681305,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bloom-filter","display_name":"Bloom filter","score":0.8620848655700684},{"id":"https://openalex.org/keywords/dna-microarray","display_name":"DNA microarray","score":0.6929742097854614},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.5745866298675537},{"id":"https://openalex.org/keywords/bloom","display_name":"Bloom","score":0.4794803559780121},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.45585593581199646},{"id":"https://openalex.org/keywords/dna","display_name":"DNA","score":0.4471893906593323},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.43933433294296265},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.4372015595436096},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.3558822274208069},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.19177722930908203},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.1687610149383545}],"concepts":[{"id":"https://openalex.org/C147224247","wikidata":"https://www.wikidata.org/wiki/Q885373","display_name":"Bloom filter","level":2,"score":0.8620848655700684},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.6929742097854614},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.5745866298675537},{"id":"https://openalex.org/C155567681","wikidata":"https://www.wikidata.org/wiki/Q2987425","display_name":"Bloom","level":2,"score":0.4794803559780121},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.45585593581199646},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.4471893906593323},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.43933433294296265},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.4372015595436096},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.3558822274208069},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.19177722930908203},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.1687610149383545},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003201","descriptor_name":"Computers","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003201","descriptor_name":"Computers","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003201","descriptor_name":"Computers","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011336","descriptor_name":"Probability","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D011336","descriptor_name":"Probability","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D011336","descriptor_name":"Probability","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000295","qualifier_name":"instrumentation","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000295","qualifier_name":"instrumentation","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000295","qualifier_name":"instrumentation","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000295","qualifier_name":"instrumentation","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000295","qualifier_name":"instrumentation","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000295","qualifier_name":"instrumentation","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D060148","descriptor_name":"HapMap Project","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D060148","descriptor_name":"HapMap Project","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D060148","descriptor_name":"HapMap Project","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":4,"locations":[{"id":"doi:10.1186/1471-2105-12-333","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-12-333","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-12-333","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmid:21831268","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/21831268","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC bioinformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:be88730b1f314a4791d46ce3b0793eaa","is_oa":false,"landing_page_url":"https://doaj.org/article/be88730b1f314a4791d46ce3b0793eaa","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 12, Iss 1, p 333 (2011)","raw_type":"article"},{"id":"pmh:oai:europepmc.org:2173818","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/3166945","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/1471-2105-12-333","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-12-333","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-12-333","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2034990753","display_name":null,"funder_award_id":"MH084703","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320306082","display_name":"Howard Hughes Medical Institute","ror":"https://ror.org/006w34k90"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2163584430.pdf","grobid_xml":"https://content.openalex.org/works/W2163584430.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W1606270358","https://openalex.org/W1785933978","https://openalex.org/W1966822396","https://openalex.org/W1993284846","https://openalex.org/W2022986961","https://openalex.org/W2046594687","https://openalex.org/W2057253402","https://openalex.org/W2081869611","https://openalex.org/W2086072057","https://openalex.org/W2096128575","https://openalex.org/W2123845384","https://openalex.org/W2126540423","https://openalex.org/W2127174073","https://openalex.org/W2133531097","https://openalex.org/W2133956160","https://openalex.org/W2136651963","https://openalex.org/W2142749416","https://openalex.org/W2156658871","https://openalex.org/W2159946869","https://openalex.org/W2160969485","https://openalex.org/W2161546116","https://openalex.org/W2169773990","https://openalex.org/W2171777347","https://openalex.org/W2885727518","https://openalex.org/W3198160809"],"related_works":["https://openalex.org/W2086572746","https://openalex.org/W2604468458","https://openalex.org/W2157216338","https://openalex.org/W1662107788","https://openalex.org/W121740227","https://openalex.org/W2051000928","https://openalex.org/W4293466821","https://openalex.org/W2785871494","https://openalex.org/W4298105090","https://openalex.org/W2789524206"],"abstract_inverted_index":{"BACKGROUND:":[0],"Counting":[1],"k-mers":[2,41,75,94,116,145,207],"(substrings":[3],"of":[4,15,33,54,103,167],"length":[5],"k":[6],"in":[7,18,38,42,89,122,147,181,191,208,223],"DNA":[8,124],"sequence":[9,34,45,125,209],"data)":[10],"is":[11,221,226,230],"an":[12],"essential":[13],"component":[14],"many":[16,98],"methods":[17],"bioinformatics,":[19],"including":[20],"for":[21,26,30,97,200,217,232],"genome":[22],"and":[23,29,80,225],"transcriptome":[24],"assembly,":[25],"metagenomic":[27],"sequencing,":[28],"error":[31,104],"correction":[32],"reads.":[35],"Although":[36],"simple":[37],"principle,":[39],"counting":[40,206],"large":[43,62],"modern":[44],"data":[46,59,126,138,162,173,210],"sets":[47],"can":[48],"easily":[49],"overwhelm":[50],"the":[51,67,90,115,143,161],"memory":[52,148,152,182,198],"capacity":[53,69],"standard":[55],"computers.":[56],"In":[57],"current":[58,186],"sets,":[60,174],"a":[61,86,109,123,133,136,157],"fraction-often":[63],"more":[64,119],"than":[65,120],"50%-of":[66],"storage":[68],"may":[70,196],"be":[71],"spent":[72],"on":[73],"storing":[74],"that":[76,112,117,140,203],"contain":[77],"sequencing":[78],"errors":[79],"which":[81],"are":[82,95],"typically":[83],"observed":[84,144],"only":[85],"single":[87],"time":[88],"data.":[91],"These":[92],"singleton":[93],"uninformative":[96],"algorithms":[99],"without":[100],"some":[101],"kind":[102],"correction.":[105],"RESULTS:":[106],"We":[107,154],"present":[108],"new":[110],"method":[111,129],"identifies":[113],"all":[114,142,168],"occur":[118],"once":[121],"set.":[127],"Our":[128],"does":[130],"this":[131,218],"using":[132],"Bloom":[134],"filter,":[135],"probabilistic":[137],"structure":[139],"stores":[141],"implicitly":[146],"with":[149,188,211],"greatly":[150],"reduced":[151],"requirements.":[153],"then":[155],"make":[156],"second":[158],"sweep":[159],"through":[160],"to":[163,178,185],"provide":[164],"exact":[165],"counts":[166],"nonunique":[169],"k-mers.":[170],"For":[171],"example":[172],"we":[175],"report":[176],"up":[177],"50%":[179],"savings":[180],"usage":[183],"compared":[184],"software,":[187],"modest":[189],"costs":[190],"computational":[192],"speed.":[193],"This":[194],"approach":[195],"reduce":[197],"requirements":[199],"any":[201],"algorithm":[202],"starts":[204],"by":[205],"errors.":[212],"CONCLUSIONS:":[213],"A":[214],"reference":[215],"implementation":[216],"methodology,":[219],"BFCounter,":[220],"written":[222],"C++":[224],"GPL":[227],"licensed.":[228],"It":[229],"available":[231],"free":[233],"download":[234],"at":[235],"http://pritch.bsd.uchicago.edu/bfcounter.html.":[236]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":13},{"year":2022,"cited_by_count":28},{"year":2021,"cited_by_count":27},{"year":2020,"cited_by_count":20},{"year":2019,"cited_by_count":29},{"year":2018,"cited_by_count":31},{"year":2017,"cited_by_count":36},{"year":2016,"cited_by_count":27},{"year":2015,"cited_by_count":16},{"year":2014,"cited_by_count":24},{"year":2013,"cited_by_count":15},{"year":2012,"cited_by_count":5}],"updated_date":"2026-06-19T15:47:20.252518","created_date":"2025-10-10T00:00:00"}
