{"id":"https://openalex.org/W3046364546","doi":"https://doi.org/10.1145/3400903.3400907","title":"Efficient Search over Genomic Short Read Data","display_name":"Efficient Search over Genomic Short Read Data","publication_year":2020,"publication_date":"2020-07-07","ids":{"openalex":"https://openalex.org/W3046364546","doi":"https://doi.org/10.1145/3400903.3400907","mag":"3046364546"},"language":"en","primary_location":{"id":"doi:10.1145/3400903.3400907","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3400903.3400907","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"32nd International Conference on Scientific and Statistical Database Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063425055","display_name":"Wangda Zhang","orcid":"https://orcid.org/0000-0002-4965-8132"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wangda Zhang","raw_affiliation_strings":["Columbia University"],"affiliations":[{"raw_affiliation_string":"Columbia University","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108711988","display_name":"Mengdi Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mengdi Lin","raw_affiliation_strings":["Columbia University"],"affiliations":[{"raw_affiliation_string":"Columbia University","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021663937","display_name":"Kenneth A. Ross","orcid":"https://orcid.org/0000-0001-9397-6990"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kenneth A. Ross","raw_affiliation_strings":["Columbia University"],"affiliations":[{"raw_affiliation_string":"Columbia University","institution_ids":["https://openalex.org/I78577930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5063425055"],"corresponding_institution_ids":["https://openalex.org/I78577930"],"apc_list":null,"apc_paid":null,"fwci":0.1326,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.53714792,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9803000092506409,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.968999981880188,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7963705658912659},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.7100232839584351},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.6153233051300049},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.5264973044395447},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.5158902406692505},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.4815247654914856},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.47824254631996155},{"id":"https://openalex.org/keywords/hash-table","display_name":"Hash table","score":0.45192477107048035},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.4337500333786011},{"id":"https://openalex.org/keywords/bloom-filter","display_name":"Bloom filter","score":0.4336591362953186},{"id":"https://openalex.org/keywords/string-searching-algorithm","display_name":"String searching algorithm","score":0.42287513613700867},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4128567576408386},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3891952931880951},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.37602975964546204},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.23433133959770203},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.14825648069381714},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.11779960989952087}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7963705658912659},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.7100232839584351},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.6153233051300049},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.5264973044395447},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.5158902406692505},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.4815247654914856},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.47824254631996155},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.45192477107048035},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.4337500333786011},{"id":"https://openalex.org/C147224247","wikidata":"https://www.wikidata.org/wiki/Q885373","display_name":"Bloom filter","level":2,"score":0.4336591362953186},{"id":"https://openalex.org/C7757238","wikidata":"https://www.wikidata.org/wiki/Q374040","display_name":"String searching algorithm","level":3,"score":0.42287513613700867},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4128567576408386},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3891952931880951},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.37602975964546204},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.23433133959770203},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.14825648069381714},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.11779960989952087},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3400903.3400907","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3400903.3400907","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"32nd International Conference on Scientific and Statistical Database Management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1523411787","https://openalex.org/W2006088692","https://openalex.org/W2020467128","https://openalex.org/W2087721273","https://openalex.org/W2092880969","https://openalex.org/W2093080729","https://openalex.org/W2100076391","https://openalex.org/W2105583187","https://openalex.org/W2110015277","https://openalex.org/W2111044311","https://openalex.org/W2111071896","https://openalex.org/W2111295912","https://openalex.org/W2117798581","https://openalex.org/W2127241402","https://openalex.org/W2131106408","https://openalex.org/W2142434250","https://openalex.org/W2142680014","https://openalex.org/W2144560237","https://openalex.org/W2147869723","https://openalex.org/W2149059931","https://openalex.org/W2153707226","https://openalex.org/W2158322625","https://openalex.org/W2159084616","https://openalex.org/W2159906372","https://openalex.org/W2160404300","https://openalex.org/W2161048357","https://openalex.org/W2165663045","https://openalex.org/W2166588423","https://openalex.org/W2169145121","https://openalex.org/W2170727800","https://openalex.org/W2170899819","https://openalex.org/W2194172909","https://openalex.org/W2429518132","https://openalex.org/W2466892528","https://openalex.org/W2516970777","https://openalex.org/W2538355508","https://openalex.org/W2574839832","https://openalex.org/W2626931021","https://openalex.org/W2764165465","https://openalex.org/W2950572599","https://openalex.org/W2951822379","https://openalex.org/W2962771342","https://openalex.org/W3023647491"],"related_works":["https://openalex.org/W2081869611","https://openalex.org/W2150999591","https://openalex.org/W1694672787","https://openalex.org/W2011343125","https://openalex.org/W1606491530","https://openalex.org/W1564712904","https://openalex.org/W2113436622","https://openalex.org/W2623698249","https://openalex.org/W2099972273","https://openalex.org/W2362008364"],"abstract_inverted_index":{"Modern":[0],"DNA":[1,77],"sequencing":[2,183],"technology":[3],"produces":[4],"large":[5,118],"volumes":[6],"of":[7,159],"genome":[8,25,56,99,182,193],"strings":[9,100,108],"for":[10,22,42,62],"various":[11],"biological":[12],"and":[13,37,134,149,165,176],"medical":[14],"applications.":[15,44],"To":[16,93],"mitigate":[17],"the":[18,40,153,160,169],"space":[19],"overhead":[20],"required":[21],"storing":[23],"these":[24],"data,":[26],"previous":[27],"research":[28],"has":[29],"developed":[30],"compression":[31,148],"schemes":[32],"to":[33,38,53,74,105,116,141],"save":[34],"storage":[35,164,170],"resources":[36],"improve":[39],"locality":[41,155],"bioinformatics":[43,67],"These":[45,124],"approaches,":[46],"however,":[47],"typically":[48],"need":[49],"significant":[50],"additional":[51],"processing":[52],"support":[54],"efficient":[55,87],"string":[57,88,150,194],"search,":[58],"an":[59,135],"important":[60],"step":[61],"downstream":[63],"applications":[64],"in":[65,80],"a":[66,81,157,173],"pipeline.":[68],"In":[69],"this":[70,95],"paper,":[71],"we":[72,97],"propose":[73],"store":[75],"raw":[76,199],"sequence":[78],"data":[79,147],"compressed":[82,129,198],"but":[83],"searchable":[84],"format,":[85,96],"enabling":[86],"lookups":[89,151,195],"using":[90,130],"database":[91],"indexes.":[92],"build":[94],"partition":[98],"by":[101,121],"computing":[102],"hash-based":[103],"minimizers":[104],"group":[106],"overlapping":[107],"together.":[109],"We":[110,162],"carefully":[111],"optimize":[112],"our":[113,189],"hash":[114],"function":[115],"avoid":[117],"buckets":[119,125],"caused":[120],"repetitive":[122],"sequences.":[123],"are":[126],"then":[127],"effectively":[128],"local":[131],"constructed":[132],"references,":[133],"index":[136],"is":[137],"built":[138],"upon":[139],"them":[140],"guide":[142],"exact":[143],"match":[144],"lookups.":[145],"Both":[146],"exploit":[152],"enhanced":[154],"as":[156,172],"result":[158],"partitioning.":[161],"implement":[163],"search":[166],"functions":[167],"over":[168,196],"format":[171],"multithreaded":[174],"library,":[175],"perform":[177],"extensive":[178],"experiments":[179],"on":[180],"real":[181],"data.":[184,201],"The":[185],"results":[186],"show":[187],"that":[188],"approach":[190],"efficiently":[191],"executes":[192],"highly":[197],"read":[200]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
