{"id":"https://openalex.org/W2557391416","doi":"https://doi.org/10.1145/2975167.2975211","title":"Kmerind","display_name":"Kmerind","publication_year":2016,"publication_date":"2016-10-02","ids":{"openalex":"https://openalex.org/W2557391416","doi":"https://doi.org/10.1145/2975167.2975211","mag":"2557391416"},"language":"en","primary_location":{"id":"doi:10.1145/2975167.2975211","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2975167.2975211","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2975211&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"http://dl.acm.org/ft_gateway.cfm?id=2975211&type=pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5105460573","display_name":"Tony Pan","orcid":"https://orcid.org/0000-0001-7945-6534"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tony Pan","raw_affiliation_strings":["School of Computational Science and Engineering, Georgia Institute of Technology, Atlanta, Georgia, USA"],"affiliations":[{"raw_affiliation_string":"School of Computational Science and Engineering, Georgia Institute of Technology, Atlanta, Georgia, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023547296","display_name":"Patrick Flick","orcid":"https://orcid.org/0000-0003-3343-2398"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Patrick Flick","raw_affiliation_strings":["School of Computational Science and Engineering, Georgia Institute of Technology, Atlanta, Georgia, USA"],"affiliations":[{"raw_affiliation_string":"School of Computational Science and Engineering, Georgia Institute of Technology, Atlanta, Georgia, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088169253","display_name":"Jain Chirag","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chirag Jain","raw_affiliation_strings":["School of Computational Science and Engineering, Georgia Institute of Technology, Atlanta, Georgia, USA"],"affiliations":[{"raw_affiliation_string":"School of Computational Science and Engineering, Georgia Institute of Technology, Atlanta, Georgia, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100617992","display_name":"Yongchao Liu","orcid":"https://orcid.org/0000-0003-3440-9675"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yongchao Liu","raw_affiliation_strings":["School of Computational Science and Engineering, Georgia Institute of Technology, Atlanta, Georgia, USA"],"affiliations":[{"raw_affiliation_string":"School of Computational Science and Engineering, Georgia Institute of Technology, Atlanta, Georgia, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074347913","display_name":"Srinivas Aluru","orcid":"https://orcid.org/0000-0003-4279-469X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Srinivas Aluru","raw_affiliation_strings":["School of Computational Science and Engineering, Georgia Institute of Technology, Atlanta, Georgia, USA"],"affiliations":[{"raw_affiliation_string":"School of Computational Science and Engineering, Georgia Institute of Technology, Atlanta, Georgia, USA","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5105460573"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":1.5688,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.84109969,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"422","last_page":"433"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8485783934593201},{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.7029372453689575},{"id":"https://openalex.org/keywords/k-mer","display_name":"k-mer","score":0.6797529458999634},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.670220136642456},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.46327778697013855},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.45127999782562256},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.373969167470932},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.30125364661216736},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.2714828848838806},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.21065863966941833},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.18651896715164185}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8485783934593201},{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.7029372453689575},{"id":"https://openalex.org/C2279292","wikidata":"https://www.wikidata.org/wiki/Q6322851","display_name":"k-mer","level":4,"score":0.6797529458999634},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.670220136642456},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.46327778697013855},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.45127999782562256},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.373969167470932},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.30125364661216736},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.2714828848838806},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.21065863966941833},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.18651896715164185},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2975167.2975211","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2975167.2975211","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2975211&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/2975167.2975211","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2975167.2975211","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2975211&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1683691965","display_name":null,"funder_award_id":"1229081","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1795292305","display_name":null,"funder_award_id":"CNS-1229081","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2766341901","display_name":"BIGDATA: Mid-Scale: DA: Collaborative Research: Genomes Galore - Core Techniques, Libraries, and Domain Specific Languages for High-Throughput DNA Sequencing","funder_award_id":"1416259","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7469405548","display_name":null,"funder_award_id":"IIS-1416259","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G795231567","display_name":null,"funder_award_id":"IIS-1416259, CNS-1229081","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320307102","display_name":"Intel Corporation","ror":"https://ror.org/01ek73717"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2557391416.pdf","grobid_xml":"https://content.openalex.org/works/W2557391416.grobid-xml"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W80321102","https://openalex.org/W1547830536","https://openalex.org/W1966822396","https://openalex.org/W1993399552","https://openalex.org/W2005202728","https://openalex.org/W2014099509","https://openalex.org/W2037444377","https://openalex.org/W2055043387","https://openalex.org/W2057253402","https://openalex.org/W2080234606","https://openalex.org/W2096128575","https://openalex.org/W2097066660","https://openalex.org/W2111307596","https://openalex.org/W2115546424","https://openalex.org/W2119404103","https://openalex.org/W2119745866","https://openalex.org/W2123845384","https://openalex.org/W2125266506","https://openalex.org/W2133956160","https://openalex.org/W2134283755","https://openalex.org/W2136145671","https://openalex.org/W2144662851","https://openalex.org/W2148288544","https://openalex.org/W2158322625","https://openalex.org/W2160969485","https://openalex.org/W2163584430","https://openalex.org/W2171003081"],"related_works":["https://openalex.org/W2411730464","https://openalex.org/W2922326376","https://openalex.org/W2188499136","https://openalex.org/W3095240633","https://openalex.org/W75168880","https://openalex.org/W4388228158","https://openalex.org/W2950956043","https://openalex.org/W2607704574","https://openalex.org/W2964338186","https://openalex.org/W2775498959"],"abstract_inverted_index":{"Counting":[0],"and":[1,22,26,41,44,51,92,98,107,122,180,204,222,227,234],"indexing":[2,78,216,233],"fixed":[3],"length":[4],"substrings,":[5],"or":[6,127,141],"k-mers,":[7],"in":[8,15,31,162,170,185,195,201],"biological":[9],"sequences":[10],"is":[11,212,237],"a":[12,74,88,111,155,163],"key":[13],"step":[14],"many":[16],"bioinformatics":[17,49],"tasks":[18],"including":[19],"genome":[20,24],"alignment":[21],"mapping,":[23],"assembly,":[25],"error":[27],"correction.":[28],"While":[29],"advances":[30],"next":[32],"generation":[33,63],"sequencing":[34],"technologies":[35],"have":[36],"dramatically":[37],"reduced":[38],"the":[39,57,61,193,213,223],"cost":[40],"improved":[42],"latency":[43],"throughput,":[45],"there":[46],"exist":[47],"few":[48],"tools":[50,148],"libraries":[52],"that":[53,101],"can":[54,113,198],"efficiently":[55],"process":[56],"data":[58,168],"sets":[59],"at":[60],"current":[62],"rate":[64],"of":[65,90,192],"1.8":[66],"terabases":[67],"every":[68],"3":[69],"days.":[70],"We":[71],"present":[72],"Kmerind,":[73,110],"high":[75,132],"performance":[76,133],"k-mer":[77,120,137,146,215,232],"library":[79,86,217,229],"for":[80,190,218,230],"distributed":[81,156,219],"memory":[82,152,157,220],"environments.":[83],"The":[84],"Kmerind":[85,159,211,236],"provides":[87],"set":[89,169],"simple":[91],"consistent":[93],"APIs":[94],"with":[95],"sequential":[96],"semantics":[97],"parallel":[99],"implementations":[100],"are":[102],"designed":[103],"to":[104],"be":[105,199],"flexible":[106],"extensible.":[108],"Using":[109],"user":[112],"easily":[114],"instantiate":[115],"application-specific":[116],"indices,":[117],"such":[118],"as":[119],"counter":[121,138],"position":[123],"index,":[124],"from":[125,239],"biult-in":[126],"user-supplied":[128],"components":[129],"without":[130],"extensive":[131],"computing":[134],"expertise.":[135],"Kmerind's":[136],"performs":[139],"similarly":[140],"better":[142],"than":[143,172],"existing,":[144],"best-in-class":[145],"counting":[147],"even":[149],"on":[150,175],"shared":[151],"systems.":[153],"In":[154],"environment,":[158],"counts":[160],"k-mers":[161,194],"120":[164],"GB":[165],"sequence":[166],"read":[167],"less":[171],"13":[173],"seconds":[174,203],"1024":[176],"Xeon":[177],"CPU":[178],"cores,":[179],"fully":[181,225],"indexes":[182],"their":[183],"positions":[184],"approximately":[186],"17":[187],"seconds.":[188],"Querying":[189],"1%":[191],"these":[196],"indices":[197],"completed":[200],"0.23":[202],"28":[205],"seconds,":[206],"respectively.":[207],"To":[208],"our":[209],"knowledge,":[210],"first":[214,224],"environments,":[221],"customizable":[226],"extensible":[228],"general":[231],"counting.":[235],"available":[238],"https://github.com/ParBLiSS/kmerind.":[240]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":4}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2016-12-08T00:00:00"}
