{"id":"https://openalex.org/W2763390627","doi":"https://doi.org/10.1371/journal.pcbi.1005777","title":"Designing small universal k-mer hitting sets for improved analysis of high-throughput sequencing","display_name":"Designing small universal k-mer hitting sets for improved analysis of high-throughput sequencing","publication_year":2017,"publication_date":"2017-10-02","ids":{"openalex":"https://openalex.org/W2763390627","doi":"https://doi.org/10.1371/journal.pcbi.1005777","mag":"2763390627","pmid":"https://pubmed.ncbi.nlm.nih.gov/28968408"},"language":"en","primary_location":{"id":"doi:10.1371/journal.pcbi.1005777","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1005777","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1005777&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1005777&type=printable","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037786810","display_name":"Yaron Orenstein","orcid":"https://orcid.org/0000-0002-3583-3112"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yaron Orenstein","raw_affiliation_strings":["Computer Science and Artificial Intelligence Laboratory, MIT, Cambridge, Massasschusetts, United States of America"],"affiliations":[{"raw_affiliation_string":"Computer Science and Artificial Intelligence Laboratory, MIT, Cambridge, Massasschusetts, United States of America","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008335823","display_name":"David Pellow","orcid":"https://orcid.org/0000-0002-6296-5209"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"David Pellow","raw_affiliation_strings":["Blavatnik School of Computer Science, Tel-Aviv University, Tel-Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Blavatnik School of Computer Science, Tel-Aviv University, Tel-Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009407234","display_name":"Guillaume Mar\u00e7ais","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guillaume Mar\u00e7ais","raw_affiliation_strings":["Computational Biology Department, School of Computer Science, Carnegie Mellon University, Pittsburgh, Pennsylvania, United States of America"],"affiliations":[{"raw_affiliation_string":"Computational Biology Department, School of Computer Science, Carnegie Mellon University, Pittsburgh, Pennsylvania, United States of America","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038806020","display_name":"Ron Shamir","orcid":"https://orcid.org/0000-0003-1889-9870"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Ron Shamir","raw_affiliation_strings":["Blavatnik School of Computer Science, Tel-Aviv University, Tel-Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Blavatnik School of Computer Science, Tel-Aviv University, Tel-Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113653378","display_name":"Carl Kingsford","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Carl Kingsford","raw_affiliation_strings":["Computational Biology Department, School of Computer Science, Carnegie Mellon University, Pittsburgh, Pennsylvania, United States of America"],"affiliations":[{"raw_affiliation_string":"Computational Biology Department, School of Computer Science, Carnegie Mellon University, Pittsburgh, Pennsylvania, United States of America","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5038806020","https://openalex.org/A5113653378"],"corresponding_institution_ids":["https://openalex.org/I16391192","https://openalex.org/I74973139"],"apc_list":{"value":2655,"currency":"USD","value_usd":2655},"apc_paid":{"value":2655,"currency":"USD","value_usd":2655},"fwci":4.1573,"has_fulltext":true,"cited_by_count":62,"citation_normalized_percentile":{"value":0.95273131,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"13","issue":"10","first_page":"e1005777","last_page":"e1005777"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10207","display_name":"Advanced biosensing and bioanalysis techniques","score":0.9757999777793884,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9641000032424927,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bloom-filter","display_name":"Bloom filter","score":0.8075792789459229},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.7180832624435425},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6584005355834961},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.628322958946228},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.6221217513084412},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6163751482963562},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.5764737129211426},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5226666331291199},{"id":"https://openalex.org/keywords/suffix","display_name":"Suffix","score":0.49644285440444946},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.4933442175388336},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4489431381225586},{"id":"https://openalex.org/keywords/suffix-array","display_name":"Suffix array","score":0.42399781942367554},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3592381477355957},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.3041149377822876},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.26464223861694336},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21420139074325562},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.16215020418167114},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.11889144778251648}],"concepts":[{"id":"https://openalex.org/C147224247","wikidata":"https://www.wikidata.org/wiki/Q885373","display_name":"Bloom filter","level":2,"score":0.8075792789459229},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.7180832624435425},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6584005355834961},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.628322958946228},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.6221217513084412},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6163751482963562},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.5764737129211426},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5226666331291199},{"id":"https://openalex.org/C2779804580","wikidata":"https://www.wikidata.org/wiki/Q102047","display_name":"Suffix","level":2,"score":0.49644285440444946},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.4933442175388336},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4489431381225586},{"id":"https://openalex.org/C2779259728","wikidata":"https://www.wikidata.org/wiki/Q281472","display_name":"Suffix array","level":3,"score":0.42399781942367554},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3592381477355957},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.3041149377822876},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.26464223861694336},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21420139074325562},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.16215020418167114},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.11889144778251648},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000067508","descriptor_name":"Computer Heuristics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000067508","descriptor_name":"Computer Heuristics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000067508","descriptor_name":"Computer Heuristics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015894","descriptor_name":"Genome, Human","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015894","descriptor_name":"Genome, Human","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015894","descriptor_name":"Genome, Human","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016680","descriptor_name":"Genome, Bacterial","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016680","descriptor_name":"Genome, Bacterial","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016680","descriptor_name":"Genome, Bacterial","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D017173","descriptor_name":"Caenorhabditis elegans","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D017173","descriptor_name":"Caenorhabditis elegans","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D017173","descriptor_name":"Caenorhabditis elegans","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D017422","descriptor_name":"Sequence Analysis, DNA","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":6,"locations":[{"id":"doi:10.1371/journal.pcbi.1005777","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1005777","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1005777&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},{"id":"pmid:28968408","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/28968408","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS computational biology","raw_type":null},{"id":"pmh:oai:RePEc:plo:pcbi00:1005777","is_oa":false,"landing_page_url":"https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005777","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:doaj.org/article:2bcc38fa38294aeab76d492cda7ef0e7","is_oa":true,"landing_page_url":"https://doaj.org/article/2bcc38fa38294aeab76d492cda7ef0e7","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Computational Biology, Vol 13, Iss 10, p e1005777 (2017)","raw_type":"article"},{"id":"pmh:oai:europepmc.org:4576092","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/5645146","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},{"id":"pmh:oai:figshare.com:article/5459854","is_oa":true,"landing_page_url":"https://figshare.com/articles/dataset/Designing_small_universal_i_k_i_-mer_hitting_sets_for_improved_analysis_of_high-throughput_sequencing/5459854","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dataset"}],"best_oa_location":{"id":"doi:10.1371/journal.pcbi.1005777","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.1005777","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1005777&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLOS Computational Biology","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.41999998688697815,"id":"https://metadata.un.org/sdg/9"}],"awards":[{"id":"https://openalex.org/G101112439","display_name":null,"funder_award_id":"Fellow","funder_id":"https://openalex.org/F4320306151","funder_display_name":"Alfred P. Sloan Foundation"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1325684988","display_name":null,"funder_award_id":"CCF-1256087","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2735894066","display_name":null,"funder_award_id":"ISF-NSFC joint program 2015-2018","funder_id":"https://openalex.org/F4320322252","funder_display_name":"Israel Science Foundation"},{"id":"https://openalex.org/G2933222473","display_name":"AF: Small: Multiscale Spectral Signatures for Local and Multi-objective Biological Network Alignment","funder_award_id":"1319998","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3068176952","display_name":null,"funder_award_id":"R01HG007104","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3355402586","display_name":null,"funder_award_id":"GBMF4554","funder_id":"https://openalex.org/F4320306202","funder_display_name":"Gordon and Betty Moore Foundation"},{"id":"https://openalex.org/G3641969089","display_name":null,"funder_award_id":"1256087","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4366449258","display_name":null,"funder_award_id":"2015-2018","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4772956920","display_name":null,"funder_award_id":"ISF-NSFC joint program","funder_id":"https://openalex.org/F4320322252","funder_display_name":"Israel Science Foundation"},{"id":"https://openalex.org/G5930628116","display_name":null,"funder_award_id":"2015-2018","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6569375368","display_name":null,"funder_award_id":"HG007104","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G7315415636","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320322252","funder_display_name":"Israel Science Foundation"},{"id":"https://openalex.org/G7958957864","display_name":null,"funder_award_id":"CCF-1256087, CCF-1319998","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8925788920","display_name":null,"funder_award_id":"CCF-1319998","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306151","display_name":"Alfred P. Sloan Foundation","ror":"https://ror.org/052csg198"},{"id":"https://openalex.org/F4320306202","display_name":"Gordon and Betty Moore Foundation","ror":"https://ror.org/006wxqw41"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322252","display_name":"Israel Science Foundation","ror":"https://ror.org/04sazxf24"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320334164","display_name":"Simons Institute for the Theory of Computing, University of California Berkeley","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2763390627.pdf","grobid_xml":"https://content.openalex.org/works/W2763390627.grobid-xml"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W1548907175","https://openalex.org/W1980664567","https://openalex.org/W2009119978","https://openalex.org/W2025583382","https://openalex.org/W2060884261","https://openalex.org/W2079944089","https://openalex.org/W2102841127","https://openalex.org/W2111044311","https://openalex.org/W2111071896","https://openalex.org/W2111295912","https://openalex.org/W2122316710","https://openalex.org/W2125266506","https://openalex.org/W2126353995","https://openalex.org/W2144560237","https://openalex.org/W2157054705","https://openalex.org/W2159954944","https://openalex.org/W2195724570","https://openalex.org/W2266239166","https://openalex.org/W2487384794","https://openalex.org/W2560209686"],"related_works":["https://openalex.org/W2227807207","https://openalex.org/W2370014100","https://openalex.org/W3137108924","https://openalex.org/W1852009617","https://openalex.org/W4287264924","https://openalex.org/W2042819006","https://openalex.org/W2170165007","https://openalex.org/W1979110442","https://openalex.org/W2156660460","https://openalex.org/W2003608043"],"abstract_inverted_index":{"With":[0],"the":[1,100,126,148,207,212],"rapidly":[2],"increasing":[3],"volume":[4],"of":[5,82,142,150,181,206],"deep":[6],"sequencing":[7],"data,":[8],"more":[9],"efficient":[10,131],"algorithms":[11],"and":[12,44,67,73,124,138,163,183,185,220,228],"data":[13],"structures":[14],"are":[15,18,167,223],"needed.":[16],"Minimizers":[17],"a":[19,80,85,97,104,110,151,171],"central":[20],"recent":[21],"paradigm":[22,57],"that":[23,58,79,157,166,193],"has":[24],"improved":[25],"various":[26,179],"sequence":[27,50,94],"analysis":[28],"tasks,":[29],"including":[30],"hashing":[31],"for":[32,40,47,125,136,178],"faster":[33],"read":[34],"overlap":[35],"detection,":[36],"sparse":[37],"suffix":[38],"arrays":[39],"creating":[41],"smaller":[42],"indexes,":[43],"Bloom":[45],"filters":[46],"speeding":[48],"up":[49],"search.":[51],"Here,":[52],"we":[53,77,128],"propose":[54,129],"an":[55],"alternative":[56],"can":[59],"lead":[60],"to":[61,108,170,189,210,216],"substantial":[62],"further":[63],"improvement":[64],"in":[65,115,161],"these":[66],"other":[68],"tasks.":[69],"For":[70],"integers":[71],"k":[72,182],"L":[74,184],">":[75],"k,":[76],"say":[78],"set":[81,88,134],"k-mers":[83],"is":[84,121,144],"universal":[86],"hitting":[87],"(UHS)":[89],"if":[90],"every":[91],"possible":[92],"L-long":[93],"must":[95],"contain":[96],"k-mer":[98],"from":[99],"set.":[101],"We":[102,155,175],"develop":[103],"heuristic":[105],"called":[106],"DOCKS":[107,158,201],"find":[109],"compact":[111],"UHS,":[112],"which":[113],"works":[114,159],"two":[116],"phases:":[117],"The":[118,140,218],"first":[119],"phase":[120],"solved":[122],"optimally,":[123],"second":[127],"several":[130],"heuristics,":[132],"trading":[133],"size":[135],"speed":[137],"memory.":[139],"use":[141],"heuristics":[143],"motivated":[145],"by":[146,186],"showing":[147],"NP-hardness":[149],"closely":[152],"related":[153],"problem.":[154],"show":[156,192],"well":[160],"practice":[162],"produces":[164],"UHSs":[165,194,222],"very":[168],"close":[169],"theoretical":[172],"lower":[173],"bound.":[174],"present":[176],"results":[177],"values":[180],"applying":[187],"them":[188],"real":[190],"genomes":[191],"indeed":[195],"improve":[196],"over":[197],"minimizers.":[198,217],"In":[199],"particular,":[200],"uses":[202],"less":[203],"than":[204],"30%":[205],"10-mers":[208],"needed":[209],"span":[211],"human":[213],"genome":[214],"compared":[215],"software":[219],"computed":[221],"freely":[224],"available":[225],"at":[226],"github.com/Shamir-Lab/DOCKS/":[227],"acgt.cs.tau.ac.il/docks/,":[229],"respectively.":[230]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":13},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":4}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
