{"id":"https://openalex.org/W2748933472","doi":"https://doi.org/10.1145/3107411.3107482","title":"Scalable Genomic Assembly through Parallel <i>de Bruijn</i> Graph Construction for Multiple K-mers","display_name":"Scalable Genomic Assembly through Parallel <i>de Bruijn</i> Graph Construction for Multiple K-mers","publication_year":2017,"publication_date":"2017-08-20","ids":{"openalex":"https://openalex.org/W2748933472","doi":"https://doi.org/10.1145/3107411.3107482","mag":"2748933472"},"language":"en","primary_location":{"id":"doi:10.1145/3107411.3107482","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3107411.3107482","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th ACM International Conference on Bioinformatics, Computational Biology,and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073589054","display_name":"Kanak Mahadik","orcid":"https://orcid.org/0000-0002-6780-4199"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kanak Mahadik","raw_affiliation_strings":["Purdue University, West Lafayette, IN, USA"],"affiliations":[{"raw_affiliation_string":"Purdue University, West Lafayette, IN, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016556843","display_name":"Christopher Wright","orcid":"https://orcid.org/0000-0002-2592-0407"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Wright","raw_affiliation_strings":["Purdue University, West Lafayette, IN, USA"],"affiliations":[{"raw_affiliation_string":"Purdue University, West Lafayette, IN, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075916086","display_name":"Milind Kulkarni","orcid":"https://orcid.org/0000-0001-6827-345X"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Milind Kulkarni","raw_affiliation_strings":["Purdue University, West Lafayette, IN, USA"],"affiliations":[{"raw_affiliation_string":"Purdue University, West Lafayette, IN, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047310442","display_name":"Saurabh Bagchi","orcid":"https://orcid.org/0000-0002-4239-5632"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Saurabh Bagchi","raw_affiliation_strings":["Purdue University, West Lafayette, IN, USA"],"affiliations":[{"raw_affiliation_string":"Purdue University, West Lafayette, IN, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055585728","display_name":"Somali Chaterji","orcid":"https://orcid.org/0000-0002-3651-6362"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Somali Chaterji","raw_affiliation_strings":["Purdue Unviersity, West Lafayatte, IN, USA"],"affiliations":[{"raw_affiliation_string":"Purdue Unviersity, West Lafayatte, IN, USA","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5073589054"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":0.8507,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.72293416,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"425","last_page":"431"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10434","display_name":"Chromosomal and Genetic Variations","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11048","display_name":"Bacteriophages and microbial interactions","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/de-bruijn-graph","display_name":"De Bruijn graph","score":0.8355912566184998},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6625651717185974},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6586825251579285},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.654491126537323},{"id":"https://openalex.org/keywords/sequence-assembly","display_name":"Sequence assembly","score":0.4884735643863678},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.47980692982673645},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.47973623871803284},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.4676472842693329},{"id":"https://openalex.org/keywords/de-bruijn-sequence","display_name":"De Bruijn sequence","score":0.46400776505470276},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4603329598903656},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.4551621973514557},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.23367956280708313},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16747382283210754},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.14306944608688354},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.10677093267440796},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.10097259283065796},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.09596413373947144}],"concepts":[{"id":"https://openalex.org/C20218877","wikidata":"https://www.wikidata.org/wiki/Q3066095","display_name":"De Bruijn graph","level":3,"score":0.8355912566184998},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6625651717185974},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6586825251579285},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.654491126537323},{"id":"https://openalex.org/C18949551","wikidata":"https://www.wikidata.org/wiki/Q740578","display_name":"Sequence assembly","level":5,"score":0.4884735643863678},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.47980692982673645},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.47973623871803284},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.4676472842693329},{"id":"https://openalex.org/C170320093","wikidata":"https://www.wikidata.org/wiki/Q1953457","display_name":"De Bruijn sequence","level":2,"score":0.46400776505470276},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4603329598903656},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.4551621973514557},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.23367956280708313},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16747382283210754},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.14306944608688354},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.10677093267440796},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.10097259283065796},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.09596413373947144},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C162317418","wikidata":"https://www.wikidata.org/wiki/Q252857","display_name":"Transcriptome","level":4,"score":0.0},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3107411.3107482","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3107411.3107482","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th ACM International Conference on Bioinformatics, Computational Biology,and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7027099446","display_name":null,"funder_award_id":"CCF-1337158,CCF-1150013","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1598134722","https://openalex.org/W1966822396","https://openalex.org/W2005202728","https://openalex.org/W2022986961","https://openalex.org/W2033292629","https://openalex.org/W2041391522","https://openalex.org/W2084134542","https://openalex.org/W2104846587","https://openalex.org/W2109848222","https://openalex.org/W2113915152","https://openalex.org/W2115501618","https://openalex.org/W2120902911","https://openalex.org/W2129933858","https://openalex.org/W2141920662","https://openalex.org/W2160969485","https://openalex.org/W2773939681"],"related_works":["https://openalex.org/W2562683361","https://openalex.org/W3087469195","https://openalex.org/W2354744388","https://openalex.org/W3125399386","https://openalex.org/W2949121831","https://openalex.org/W3215786367","https://openalex.org/W2991626973","https://openalex.org/W4387083702","https://openalex.org/W4318940746","https://openalex.org/W2133531097"],"abstract_inverted_index":{"Extraordinary":[0],"progress":[1],"in":[2,12,72,145,199],"genome":[3,198],"sequencing":[4],"technologies":[5],"has":[6],"led":[7],"to":[8,26,34,48,68,106,136,217],"a":[9,23,108,131,149,178],"tremendous":[10],"increase":[11],"the":[13,35,50,82,85,103,124,153,156,167,170,195,204,208,220],"number":[14],"of":[15,37,84,102,127,133,180,222,232],"sequenced":[16],"genomes.":[17],"However,":[18,75],"biologists":[19],"have":[20],"run":[21],"into":[22],"computational":[24],"bottleneck":[25],"assemble":[27,107,185],"large":[28,69],"and":[29,39,144,231],"complex":[30,186,197],"genomes":[31,187],"quickly,":[32],"due":[33],"lack":[36],"scalable":[38],"parallel":[40],"de":[41,52],"novo":[42],"assembly":[43],"algorithms.":[44],"Among":[45],"several":[46],"approaches":[47],"assembly,":[49],"iterative":[51],"Bruijn":[53],"graph":[54,73,98,140,154,168],"(DBG)":[55],"assemblers,":[56],"such":[57],"as":[58],"IDBA-UD,":[59],"generate":[60],"high-quality":[61],"assemblies":[62],"by":[63,226,235],"sequentially":[64],"iterating":[65],"from":[66,166],"small":[67],"k-values":[70,89],"used":[71],"construction.":[74],"this":[76,117],"approach":[77],"is":[78,141],"time":[79,105],"intensive":[80],"because":[81],"creation":[83],"graphs":[86],"for":[87,130,155,177,194,207],"increasing":[88],"proceeds":[90],"sequentially.":[91],"For":[92],"example,":[93],"with":[94,111,163,169],"just":[95],"eight":[96],"k-values,":[97],"construction":[99,129],"takes":[100],"96%":[101],"total":[104],"metagenomic":[109],"dataset":[110],"33":[112],"million":[113],"paired-end":[114],"reads.":[115],"In":[116],"paper,":[118],"we":[119],"propose":[120],"ScalaDBG,":[121],"which":[122],"transforms":[123],"sequential":[125],"process":[126],"DBG":[128],"range":[132],"k":[134,158,172],"values,":[135],"one":[137],"where":[138],"each":[139],"built":[142],"independently":[143],"parallel.":[146],"We":[147,174],"develop":[148],"novel":[150],"mechanism":[151],"whereby":[152],"higher":[157],"value":[159],"can":[160,184],"be":[161],"\"patched\"":[162],"contigs":[164],"generated":[165],"lower":[171],"value.":[173],"show":[175],"that":[176],"variety":[179],"datasets":[181],"our":[182,200],"technique":[183],"much":[188],"faster":[189,193],"than":[190],"IDBA-UD":[191],"(6.7X":[192],"most":[196],"dataset)":[201],"while":[202],"maintaining":[203],"same":[205],"accuracy":[206],"assembled":[209],"genome.":[210],"Moreover,":[211],"ScalaDBG's":[212],"multi-level":[213],"parallelism":[214],"allows":[215],"it":[216],"simultaneously":[218],"leverage":[219],"power":[221],"mighty":[223],"server":[224],"machines":[225],"using":[227],"all":[228],"their":[229],"cores":[230],"compute":[233],"clusters":[234],"scaling":[236],"out.":[237]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
