{"id":"https://openalex.org/W3184104403","doi":"https://doi.org/10.1145/3459930.3469520","title":"Improving the efficiency of de Bruijn graph construction using compact universal hitting sets","display_name":"Improving the efficiency of de Bruijn graph construction using compact universal hitting sets","publication_year":2021,"publication_date":"2021-07-30","ids":{"openalex":"https://openalex.org/W3184104403","doi":"https://doi.org/10.1145/3459930.3469520","mag":"3184104403"},"language":"en","primary_location":{"id":"doi:10.1145/3459930.3469520","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3459930.3469520","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3459930.3469520","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006655105","display_name":"Yael Ben-Ari","orcid":null},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Yael Ben-Ari","raw_affiliation_strings":["Tel-Aviv University, Tel-Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Tel-Aviv University, Tel-Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054635515","display_name":"Dan Flomin","orcid":"https://orcid.org/0000-0002-1911-7298"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Dan Flomin","raw_affiliation_strings":["Tel-Aviv University, Tel-Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Tel-Aviv University, Tel-Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039129661","display_name":"Lianrong Pu","orcid":"https://orcid.org/0009-0003-3669-8156"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Lianrong Pu","raw_affiliation_strings":["Tel-Aviv University, Tel-Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Tel-Aviv University, Tel-Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037786810","display_name":"Yaron Orenstein","orcid":"https://orcid.org/0000-0002-3583-3112"},"institutions":[{"id":"https://openalex.org/I124227911","display_name":"Ben-Gurion University of the Negev","ror":"https://ror.org/05tkyf982","country_code":"IL","type":"education","lineage":["https://openalex.org/I124227911"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Yaron Orenstein","raw_affiliation_strings":["Engineering Ben-Gurion University of the Negev, Beer-Sheva, Israel"],"affiliations":[{"raw_affiliation_string":"Engineering Ben-Gurion University of the Negev, Beer-Sheva, Israel","institution_ids":["https://openalex.org/I124227911"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038806020","display_name":"Ron Shamir","orcid":"https://orcid.org/0000-0003-1889-9870"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Ron Shamir","raw_affiliation_strings":["Tel-Aviv University, Tel-Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Tel-Aviv University, Tel-Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5006655105"],"corresponding_institution_ids":["https://openalex.org/I16391192"],"apc_list":null,"apc_paid":null,"fwci":0.3218,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.5570579,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/de-bruijn-graph","display_name":"De Bruijn graph","score":0.8308143615722656},{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.678370475769043},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6671703457832336},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6505914926528931},{"id":"https://openalex.org/keywords/de-bruijn-sequence","display_name":"De Bruijn sequence","score":0.6297398805618286},{"id":"https://openalex.org/keywords/k-mer","display_name":"k-mer","score":0.573695182800293},{"id":"https://openalex.org/keywords/sequence-assembly","display_name":"Sequence assembly","score":0.5284514427185059},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.4868687689304352},{"id":"https://openalex.org/keywords/reference-genome","display_name":"Reference genome","score":0.47077223658561707},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4337444305419922},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.41658106446266174},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.39549142122268677},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.3284137547016144},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3239467144012451},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.25128471851348877},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.23867857456207275},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.17341655492782593},{"id":"https://openalex.org/keywords/dna","display_name":"DNA","score":0.16158491373062134},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.1473884880542755},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.09811767935752869}],"concepts":[{"id":"https://openalex.org/C20218877","wikidata":"https://www.wikidata.org/wiki/Q3066095","display_name":"De Bruijn graph","level":3,"score":0.8308143615722656},{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.678370475769043},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6671703457832336},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6505914926528931},{"id":"https://openalex.org/C170320093","wikidata":"https://www.wikidata.org/wiki/Q1953457","display_name":"De Bruijn sequence","level":2,"score":0.6297398805618286},{"id":"https://openalex.org/C2279292","wikidata":"https://www.wikidata.org/wiki/Q6322851","display_name":"k-mer","level":4,"score":0.573695182800293},{"id":"https://openalex.org/C18949551","wikidata":"https://www.wikidata.org/wiki/Q740578","display_name":"Sequence assembly","level":5,"score":0.5284514427185059},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.4868687689304352},{"id":"https://openalex.org/C192953774","wikidata":"https://www.wikidata.org/wiki/Q7307127","display_name":"Reference genome","level":4,"score":0.47077223658561707},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4337444305419922},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.41658106446266174},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.39549142122268677},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.3284137547016144},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3239467144012451},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.25128471851348877},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.23867857456207275},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.17341655492782593},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.16158491373062134},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.1473884880542755},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.09811767935752869},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C162317418","wikidata":"https://www.wikidata.org/wiki/Q252857","display_name":"Transcriptome","level":4,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3459930.3469520","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3459930.3469520","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3459930.3469520","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3459930.3469520","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1577822803","https://openalex.org/W1973094248","https://openalex.org/W2009119978","https://openalex.org/W2010454899","https://openalex.org/W2102841127","https://openalex.org/W2111071896","https://openalex.org/W2111295912","https://openalex.org/W2111307596","https://openalex.org/W2125266506","https://openalex.org/W2134526812","https://openalex.org/W2144560237","https://openalex.org/W2159954944","https://openalex.org/W2438121987","https://openalex.org/W2583363792","https://openalex.org/W2763390627","https://openalex.org/W2897927784","https://openalex.org/W2950150251","https://openalex.org/W2951822379","https://openalex.org/W2999074280","https://openalex.org/W3102863871","https://openalex.org/W3117679888","https://openalex.org/W4237975145"],"related_works":["https://openalex.org/W2120956621","https://openalex.org/W2915362964","https://openalex.org/W3215786367","https://openalex.org/W3205360317","https://openalex.org/W4214573178","https://openalex.org/W2562683361","https://openalex.org/W3200242814","https://openalex.org/W4200364103","https://openalex.org/W4282946044","https://openalex.org/W2340761128"],"abstract_inverted_index":{"High-throughput":[0],"sequencing":[1,8,20,140,182],"techniques":[2,21],"generate":[3],"large":[4,47,199],"volumes":[5],"of":[6,33,85,135,152,161,173,201,214,250,261],"DNA":[7,58,215],"data":[9,37,62],"at":[10],"ultra-fast":[11],"speed":[12],"and":[13,28,39,64,203,227,283],"extremely":[14],"low":[15],"cost.":[16],"As":[17],"a":[18,86,90,99,105,111,127,169,177,189,242,257,278],"consequence,":[19],"have":[22,41,121,130],"become":[23],"ubiquitous":[24],"in":[25,31,60,81,89,138,148,163,180,192,211,221,245,280],"biomedical":[26],"research":[27],"are":[29,76,224],"used":[30],"hundreds":[32],"genomic":[34],"applications.":[35],"Efficient":[36],"structures":[38,63],"algorithms":[40,65],"been":[42,144],"developed":[43],"to":[44,56,98,146,277],"handle":[45],"the":[46,77,93,133,158,164,212,228,237,246,251],"datasets":[48],"produced":[49,266],"by":[50,67,235],"these":[51],"techniques.":[52],"The":[53,219],"prevailing":[54],"method":[55],"index":[57],"sequences":[59,216,220],"those":[61],"is":[66,95,176,188,233],"using":[68,136],"k-mers":[69,79],"(k-long":[70],"substrings)":[71],"known":[72],"as":[73],"minimizers.":[74],"Minimizers":[75],"smallest":[78,94],"selected":[80],"every":[82],"consecutive":[83],"window":[84],"fixed":[87],"length":[88],"sequence,":[91],"where":[92],"determined":[96],"according":[97],"predefined":[100],"order,":[101],"e.g.,":[102],"lexicographic.":[103],"Recently,":[104],"new":[106],"k-mer":[107,153],"order":[108,244,259],"based":[109,125],"on":[110,126],"universal":[112],"hitting":[113],"set":[114],"(UHS)":[115],"was":[116],"suggested.":[117],"While":[118],"several":[119],"studies":[120],"shown":[122],"that":[123],"orders":[124],"small":[128],"UHS":[129],"improved":[131],"properties,":[132],"utility":[134],"them":[137],"high-throughput":[139,181],"analysis":[141],"tasks":[142],"has":[143],"demonstrated":[145],"date":[147],"only":[149],"one":[150],"application":[151],"counting.":[154],"Here,":[155],"we":[156],"demonstrate":[157],"practical":[159],"benefit":[160],"UHSs":[162],"genome":[165,170,193],"assembly":[166],"task.":[167],"Reconstructing":[168],"from":[171],"billions":[172],"short":[174],"reads":[175],"fundamental":[178],"task":[179],"analyses.":[183],"De":[184],"Bruijn":[185,231],"graph":[186,232],"construction":[187],"key":[190],"step":[191,249],"assembly,":[194],"which":[195,275],"often":[196],"requires":[197],"very":[198],"amounts":[200],"memory":[202,284],"long":[204],"computation":[205],"time.":[206],"A":[207],"critical":[208],"bottleneck":[209],"lies":[210],"partitioning":[213],"into":[217],"bins.":[218],"each":[222],"bin":[223,247,273],"assembled":[225],"separately,":[226],"final":[229],"de":[230],"constructed":[234],"merging":[236],"bin-specific":[238],"subgraphs.":[239],"We":[240],"incorporated":[241],"UHS-based":[243,258],"partition":[248],"Minimum":[252],"Substring":[253],"Partitioning":[254],"algorithm.":[255],"Using":[256],"instead":[260],"lexicographic-":[262],"or":[263],"random-ordered":[264],"minimizers":[265,269],"lower":[267],"density":[268],"with":[270],"more":[271],"balanced":[272],"partitioning,":[274],"led":[276],"reduction":[279],"both":[281],"runtime":[282],"usage.":[285]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-12-10T02:45:41.426853","created_date":"2025-10-10T00:00:00"}
