{"id":"https://openalex.org/W1982633903","doi":"https://doi.org/10.1145/2807591.2807619","title":"A parallel connectivity algorithm for de Bruijn graphs in metagenomic applications","display_name":"A parallel connectivity algorithm for de Bruijn graphs in metagenomic applications","publication_year":2015,"publication_date":"2015-10-27","ids":{"openalex":"https://openalex.org/W1982633903","doi":"https://doi.org/10.1145/2807591.2807619","mag":"1982633903"},"language":"en","primary_location":{"id":"doi:10.1145/2807591.2807619","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2807591.2807619","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2807619&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"http://dl.acm.org/ft_gateway.cfm?id=2807619&type=pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023547296","display_name":"Patrick Flick","orcid":"https://orcid.org/0000-0003-3343-2398"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Patrick Flick","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, Georgia","Georgia Institute of Technology , Atlanta, Georgia"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, Georgia","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology , Atlanta, Georgia","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102941841","display_name":"Chirag Jain","orcid":"https://orcid.org/0000-0001-7837-2128"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chirag Jain","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, Georgia","Georgia Institute of Technology , Atlanta, Georgia"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, Georgia","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology , Atlanta, Georgia","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105460573","display_name":"Tony Pan","orcid":"https://orcid.org/0000-0001-7945-6534"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tony Pan","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, Georgia","Georgia Institute of Technology , Atlanta, Georgia"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, Georgia","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology , Atlanta, Georgia","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074347913","display_name":"Srinivas Aluru","orcid":"https://orcid.org/0000-0003-4279-469X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Srinivas Aluru","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, Georgia","Georgia Institute of Technology , Atlanta, Georgia"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, Georgia","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology , Atlanta, Georgia","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5023547296"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":3.1691,"has_fulltext":true,"cited_by_count":25,"citation_normalized_percentile":{"value":0.9214467,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/de-bruijn-sequence","display_name":"De Bruijn sequence","score":0.8711060285568237},{"id":"https://openalex.org/keywords/de-bruijn-graph","display_name":"De Bruijn graph","score":0.7706925868988037},{"id":"https://openalex.org/keywords/metagenomics","display_name":"Metagenomics","score":0.6800686717033386},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6689770221710205},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.40826112031936646},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3468285799026489},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21532949805259705},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.1930445432662964},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.12886402010917664}],"concepts":[{"id":"https://openalex.org/C170320093","wikidata":"https://www.wikidata.org/wiki/Q1953457","display_name":"De Bruijn sequence","level":2,"score":0.8711060285568237},{"id":"https://openalex.org/C20218877","wikidata":"https://www.wikidata.org/wiki/Q3066095","display_name":"De Bruijn graph","level":3,"score":0.7706925868988037},{"id":"https://openalex.org/C15151743","wikidata":"https://www.wikidata.org/wiki/Q903778","display_name":"Metagenomics","level":3,"score":0.6800686717033386},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6689770221710205},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.40826112031936646},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3468285799026489},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21532949805259705},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.1930445432662964},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.12886402010917664},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2807591.2807619","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2807591.2807619","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2807619&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/2807591.2807619","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2807591.2807619","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2807619&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Life in Land","id":"https://metadata.un.org/sdg/15","score":0.7300000190734863}],"awards":[{"id":"https://openalex.org/G1683691965","display_name":null,"funder_award_id":"1229081","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1795292305","display_name":null,"funder_award_id":"CNS-1229081","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2567864256","display_name":null,"funder_award_id":"CCF-1360593","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2766341901","display_name":"BIGDATA: Mid-Scale: DA: Collaborative Research: Genomes Galore - Core Techniques, Libraries, and Domain Specific Languages for High-Throughput DNA Sequencing","funder_award_id":"1416259","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4078666295","display_name":"AF: Medium: Parallel Algorithms and Software for High-Throughput Sequence Assembly","funder_award_id":"1360593","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7469405548","display_name":null,"funder_award_id":"IIS-1416259","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7897703779","display_name":null,"funder_award_id":"CCF-1360593, IIS-1416259","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1982633903.pdf","grobid_xml":"https://content.openalex.org/works/W1982633903.grobid-xml"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W78403084","https://openalex.org/W92301696","https://openalex.org/W1557692423","https://openalex.org/W1569498258","https://openalex.org/W1849362181","https://openalex.org/W1957208641","https://openalex.org/W1989437487","https://openalex.org/W1989741444","https://openalex.org/W2004350101","https://openalex.org/W2010002522","https://openalex.org/W2013081353","https://openalex.org/W2018481625","https://openalex.org/W2039531605","https://openalex.org/W2047345171","https://openalex.org/W2057839002","https://openalex.org/W2081538566","https://openalex.org/W2086825382","https://openalex.org/W2105067980","https://openalex.org/W2109848222","https://openalex.org/W2113915152","https://openalex.org/W2120925698","https://openalex.org/W2140175819","https://openalex.org/W2154111453","https://openalex.org/W2159591897","https://openalex.org/W2160969485","https://openalex.org/W2163830511"],"related_works":["https://openalex.org/W2562683361","https://openalex.org/W3087469195","https://openalex.org/W2354744388","https://openalex.org/W3125399386","https://openalex.org/W2949121831","https://openalex.org/W3215786367","https://openalex.org/W2122316710","https://openalex.org/W4301429973","https://openalex.org/W2951695076","https://openalex.org/W1964377951"],"abstract_inverted_index":{"Dramatic":[0],"advances":[1],"in":[2,77,147],"DNA":[3,19],"sequencing":[4,16],"technology":[5],"have":[6],"made":[7],"it":[8],"possible":[9],"to":[10,23,42,89,97,143],"study":[11],"microbial":[12],"environments":[13],"by":[14],"direct":[15],"of":[17,72,106,123],"environmental":[18],"samples.":[20],"Yet,":[21],"due":[22],"the":[24,51,57,63,78,91,99,104],"huge":[25],"volume":[26],"and":[27,94],"high":[28],"data":[29],"complexity,":[30],"current":[31],"de":[32,79],"novo":[33],"assemblers":[34],"cannot":[35],"handle":[36],"large":[37],"metagenomic":[38,58],"datasets":[39],"or":[40],"fail":[41],"perform":[43],"assembly":[44,59],"with":[45,114],"acceptable":[46],"quality.":[47,65],"This":[48],"paper":[49],"presents":[50],"first":[52],"parallel":[53],"solution":[54,140],"for":[55,131],"decomposing":[56],"problem":[60,69],"without":[61],"compromising":[62],"post-assembly":[64],"We":[66,82,102],"transform":[67],"this":[68],"into":[70],"that":[71],"finding":[73,144],"weakly":[74],"connected":[75,92,145],"components":[76,146],"Bruijn":[80],"graph.":[81],"propose":[83],"a":[84,110,121,132],"novel":[85],"distributed":[86],"memory":[87],"algorithm":[88,108],"identify":[90],"subgraphs,":[93],"present":[95],"strategies":[96],"minimize":[98],"communication":[100],"volume.":[101],"demonstrate":[103],"scalability":[105],"our":[107,139],"on":[109],"soil":[111],"metagenome":[112],"dataset":[113],"1.8":[115],"billion":[116],"reads.":[117],"Our":[118],"approach":[119],"achieves":[120],"runtime":[122],"22":[124],"minutes":[125],"using":[126],"1280":[127],"Intel":[128],"Xeon":[129],"cores":[130],"421":[133],"GB":[134],"uncompressed":[135],"FASTQ":[136],"dataset.":[137],"Moreover,":[138],"is":[141],"generalizable":[142],"arbitrary":[148],"undirected":[149],"graphs.":[150]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":13},{"year":2016,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
