{"id":"https://openalex.org/W2558621397","doi":"https://doi.org/10.1145/2975167.2975192","title":"A Fast Sketch-based Assembler for Genomes","display_name":"A Fast Sketch-based Assembler for Genomes","publication_year":2016,"publication_date":"2016-10-02","ids":{"openalex":"https://openalex.org/W2558621397","doi":"https://doi.org/10.1145/2975167.2975192","mag":"2558621397"},"language":"en","primary_location":{"id":"doi:10.1145/2975167.2975192","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2975167.2975192","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2975167.2975192","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/2975167.2975192","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015618891","display_name":"Priyanka Ghosh","orcid":"https://orcid.org/0000-0002-9812-0689"},"institutions":[{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Priyanka Ghosh","raw_affiliation_strings":["School of EECS, Washington State University, Pullman, WA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of EECS, Washington State University, Pullman, WA","institution_ids":["https://openalex.org/I72951846"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103010775","display_name":"Ananth Kalyanaraman","orcid":"https://orcid.org/0000-0003-3495-2264"},"institutions":[{"id":"https://openalex.org/I72951846","display_name":"Washington State University","ror":"https://ror.org/05dk0ce17","country_code":"US","type":"education","lineage":["https://openalex.org/I72951846"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ananth Kalyanaraman","raw_affiliation_strings":["School of EECS, Washington State University, Pullman, WA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of EECS, Washington State University, Pullman, WA","institution_ids":["https://openalex.org/I72951846"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5015618891"],"corresponding_institution_ids":["https://openalex.org/I72951846"],"apc_list":null,"apc_paid":null,"fwci":0.5222,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.72715742,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"241","last_page":"250"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9728999733924866,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/de-bruijn-graph","display_name":"De Bruijn graph","score":0.8501260280609131},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.756205141544342},{"id":"https://openalex.org/keywords/de-bruijn-sequence","display_name":"De Bruijn sequence","score":0.7368066906929016},{"id":"https://openalex.org/keywords/sequence-assembly","display_name":"Sequence assembly","score":0.586975634098053},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5864046812057495},{"id":"https://openalex.org/keywords/contig","display_name":"Contig","score":0.520102858543396},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.516555666923523},{"id":"https://openalex.org/keywords/traverse","display_name":"Traverse","score":0.49040675163269043},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.467693567276001},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.45986247062683105},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.45630496740341187},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.45608091354370117},{"id":"https://openalex.org/keywords/reference-genome","display_name":"Reference genome","score":0.41278141736984253},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.34254491329193115},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.17514875531196594},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.133310467004776},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10558044910430908}],"concepts":[{"id":"https://openalex.org/C20218877","wikidata":"https://www.wikidata.org/wiki/Q3066095","display_name":"De Bruijn graph","level":3,"score":0.8501260280609131},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.756205141544342},{"id":"https://openalex.org/C170320093","wikidata":"https://www.wikidata.org/wiki/Q1953457","display_name":"De Bruijn sequence","level":2,"score":0.7368066906929016},{"id":"https://openalex.org/C18949551","wikidata":"https://www.wikidata.org/wiki/Q740578","display_name":"Sequence assembly","level":5,"score":0.586975634098053},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5864046812057495},{"id":"https://openalex.org/C59582021","wikidata":"https://www.wikidata.org/wiki/Q1128751","display_name":"Contig","level":4,"score":0.520102858543396},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.516555666923523},{"id":"https://openalex.org/C176809094","wikidata":"https://www.wikidata.org/wiki/Q15401496","display_name":"Traverse","level":2,"score":0.49040675163269043},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.467693567276001},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.45986247062683105},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.45630496740341187},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.45608091354370117},{"id":"https://openalex.org/C192953774","wikidata":"https://www.wikidata.org/wiki/Q7307127","display_name":"Reference genome","level":4,"score":0.41278141736984253},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.34254491329193115},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.17514875531196594},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.133310467004776},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10558044910430908},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C162317418","wikidata":"https://www.wikidata.org/wiki/Q252857","display_name":"Transcriptome","level":4,"score":0.0},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2975167.2975192","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2975167.2975192","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2975167.2975192","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/2975167.2975192","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2975167.2975192","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2975167.2975192","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1286236842","display_name":null,"funder_award_id":"-AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G1677143136","display_name":null,"funder_award_id":"05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G2503023272","display_name":null,"funder_award_id":"Contract No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3083819904","display_name":null,"funder_award_id":"05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G4501827968","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G4565140552","display_name":null,"funder_award_id":"-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G498139845","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G5076365615","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G5296923526","display_name":null,"funder_award_id":"Contract No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320317220","funder_display_name":"National Energy Research Scientific Computing Center"},{"id":"https://openalex.org/G6348972864","display_name":null,"funder_award_id":"AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G6558272803","display_name":null,"funder_award_id":"DE-AC02","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G7368046788","display_name":null,"funder_award_id":"DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G7672463193","display_name":null,"funder_award_id":"No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G805243471","display_name":null,"funder_award_id":"Contract No. DE-AC02-05CH11231","funder_id":"https://openalex.org/F4320332359","funder_display_name":"Office of Science"},{"id":"https://openalex.org/G833114420","display_name":null,"funder_award_id":"DE-SC-0006516","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G969889393","display_name":null,"funder_award_id":"DE-AC02-","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320317220","display_name":"National Energy Research Scientific Computing Center","ror":"https://ror.org/05v3mvq14"},{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2558621397.pdf","grobid_xml":"https://content.openalex.org/works/W2558621397.grobid-xml"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W100509257","https://openalex.org/W1966822396","https://openalex.org/W1980606681","https://openalex.org/W1989437487","https://openalex.org/W2080234606","https://openalex.org/W2105067980","https://openalex.org/W2105656684","https://openalex.org/W2107772251","https://openalex.org/W2133531097","https://openalex.org/W2136651963","https://openalex.org/W2142749416","https://openalex.org/W2151017710","https://openalex.org/W2157539385","https://openalex.org/W2160969485","https://openalex.org/W2161048357","https://openalex.org/W2161546116","https://openalex.org/W2198888083","https://openalex.org/W3198160809"],"related_works":["https://openalex.org/W2562683361","https://openalex.org/W3087469195","https://openalex.org/W2354744388","https://openalex.org/W3125399386","https://openalex.org/W2949121831","https://openalex.org/W2991626973","https://openalex.org/W4387083702","https://openalex.org/W4318940746","https://openalex.org/W2006192792","https://openalex.org/W2133531097"],"abstract_inverted_index":{"De":[0],"novo":[1],"genome":[2,11,39],"assembly":[3,40,238],"describes":[4],"the":[5,23,45,56,66,79,85,104,121,143,148,195,236],"process":[6],"of":[7,16,28,36,44,60,74,87,106,110,120,142,150,186],"reconstructing":[8],"an":[9,139,152,172],"unknown":[10],"from":[12,22],"a":[13,61,72,130,161,183,246],"large":[14],"collection":[15],"short":[17,52,75],"(or":[18],"long)":[19],"reads":[20],"sequenced":[21],"genome.":[24],"A":[25],"single":[26],"run":[27],"Next-Generation":[29],"Sequencing":[30],"(NGS)":[31],"technologies":[32],"can":[33],"produce":[34,271],"billions":[35],"reads,":[37,89],"making":[38],"computationally":[41],"demanding.":[42],"One":[43],"major":[46],"computational":[47],"steps":[48,109],"in":[49,97,285],"modern":[50],"day":[51],"read":[53,76],"assemblers":[54,77],"involves":[55],"construction":[57,112],"and":[58,90,93,113,123,229,260,289],"use":[59],"string":[62],"data":[63,163,167],"structure":[64,164],"called":[65],"de":[67,81,144,174],"Bruijn":[68,82,145,175],"graph.":[69],"In":[70,125,199],"fact,":[71],"majority":[73],"build":[78,138],"complete":[80],"graph":[83,111,146,176],"for":[84,147,165],"set":[86],"input":[88],"subsequently":[91],"traverse":[92],"prune":[94],"low-quality":[95],"edges,":[96],"order":[98],"to":[99,116,137,182,192,194,210,222,270],"generate":[100],"genomic":[101],"\"contigs\"":[102],"---":[103],"output":[105,237],"assembly.":[107,153],"These":[108],"traversal,":[114],"contribute":[115,193,209],"well":[117],"over":[118],"90%":[119],"runtime":[122],"memory.":[124],"this":[126],"paper,":[127],"we":[128,243],"present":[129],"fast":[131],"algorithm,":[132],"FastEtch,":[133],"that":[134,177,188,206,265],"uses":[135,156],"sketching":[136],"approximate":[140,173,218],"version":[141],"purpose":[149],"generating":[151],"The":[154,169],"algorithm":[155,254],"Count-Min":[157],"sketch,":[158],"which":[159,208],"is":[160,171,220,268],"probabilistic":[162],"streaming":[166],"sets.":[168],"result":[170],"stores":[178],"information":[179],"pertaining":[180],"only":[181],"selected":[184],"subset":[185],"nodes":[187],"are":[189,202,214],"most":[190,279],"likely":[191],"contig":[196,212],"generation":[197,213],"step.":[198],"addition,":[200],"edges":[201],"not":[203],"stored;":[204],"instead":[205],"fraction":[207],"our":[211,253,266],"detected":[215],"on-the-fly.":[216],"This":[217],"approach":[219],"intended":[221],"significantly":[223,286],"improve":[224],"performance":[225],"(both":[226],"execution":[227],"time":[228,290],"memory":[230,288],"footprint)":[231],"whilst":[232],"possibly":[233],"compromising":[234],"on":[235,256],"quality.":[239],"For":[240],"further":[241],"scalability,":[242],"have":[244],"implemented":[245],"multi-threaded":[247],"parallel":[248],"code.":[249],"Experimental":[250],"results":[251],"using":[252],"conducted":[255],"E.":[257],"coli,":[258],"Yeast,":[259],"C.":[261],"elegans":[262],"genomes":[263],"show":[264],"method":[267],"able":[269],"assemblies":[272],"with":[273],"quality":[274],"comparable":[275],"or":[276],"better":[277],"than":[278],"other":[280],"state-of-the-art":[281],"assemblers,":[282],"while":[283],"running":[284],"reduced":[287],"footprint.":[291]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2026-05-09T13:55:54.758798","created_date":"2025-10-10T00:00:00"}
