{"id":"https://openalex.org/W2914655408","doi":"https://doi.org/10.1109/bibm.2018.8621546","title":"SORA: Scalable Overlap-graph Reduction Algorithms for Genome Assembly using Apache Spark in the Cloud","display_name":"SORA: Scalable Overlap-graph Reduction Algorithms for Genome Assembly using Apache Spark in the Cloud","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2914655408","doi":"https://doi.org/10.1109/bibm.2018.8621546","mag":"2914655408"},"language":"en","primary_location":{"id":"doi:10.1109/bibm.2018.8621546","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm.2018.8621546","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.osti.gov/biblio/1557475","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103953329","display_name":"Alexander J. Paul","orcid":null},"institutions":[{"id":"https://openalex.org/I47838141","display_name":"Saint Louis University","ror":"https://ror.org/01p7jjy08","country_code":"US","type":"education","lineage":["https://openalex.org/I47838141"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Alexander J. Paul","raw_affiliation_strings":["Bioinformatics and Computational Biology Program, Saint Louis University, St. Louis, MO, US"],"affiliations":[{"raw_affiliation_string":"Bioinformatics and Computational Biology Program, Saint Louis University, St. Louis, MO, US","institution_ids":["https://openalex.org/I47838141"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025681662","display_name":"Dylan Lawrence","orcid":"https://orcid.org/0000-0001-6919-8308"},"institutions":[{"id":"https://openalex.org/I204465549","display_name":"Washington University in St. Louis","ror":"https://ror.org/01yc7t268","country_code":"US","type":"education","lineage":["https://openalex.org/I204465549"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dylan Lawrence","raw_affiliation_strings":["Computational and Systems Biology Program, Washington University in St. Louis, St. Louis, MO, US"],"affiliations":[{"raw_affiliation_string":"Computational and Systems Biology Program, Washington University in St. Louis, St. Louis, MO, US","institution_ids":["https://openalex.org/I204465549"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035020782","display_name":"Myoungkyu Song","orcid":"https://orcid.org/0000-0003-4477-8933"},"institutions":[{"id":"https://openalex.org/I122266389","display_name":"University of Nebraska at Omaha","ror":"https://ror.org/04yrkc140","country_code":"US","type":"education","lineage":["https://openalex.org/I122266389"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Myoungkyu Song","raw_affiliation_strings":["Department of Computer Science, University of Nebraska at Omaha, Omaha, NE, US"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Nebraska at Omaha, Omaha, NE, US","institution_ids":["https://openalex.org/I122266389"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029824262","display_name":"Seung\u2013Hwan Lim","orcid":"https://orcid.org/0000-0001-9461-6866"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Seung-Hwan Lim","raw_affiliation_strings":["Computer Science and Mathematics Division, Oak Ridge National Laboratory, Oak Ridge, TN, US"],"affiliations":[{"raw_affiliation_string":"Computer Science and Mathematics Division, Oak Ridge National Laboratory, Oak Ridge, TN, US","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035236332","display_name":"Chongle Pan","orcid":"https://orcid.org/0000-0003-2860-0334"},"institutions":[{"id":"https://openalex.org/I8692664","display_name":"University of Oklahoma","ror":"https://ror.org/02aqsxs83","country_code":"US","type":"education","lineage":["https://openalex.org/I8692664"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chongle Pan","raw_affiliation_strings":["School of Computer Science, University of Oklahoma, Norman, OK, US"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Oklahoma, Norman, OK, US","institution_ids":["https://openalex.org/I8692664"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047054370","display_name":"Tae-Hyuk Ahn","orcid":"https://orcid.org/0000-0002-7281-9459"},"institutions":[{"id":"https://openalex.org/I47838141","display_name":"Saint Louis University","ror":"https://ror.org/01p7jjy08","country_code":"US","type":"education","lineage":["https://openalex.org/I47838141"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tae-Hyuk Ahn","raw_affiliation_strings":["Department of Computer Science, Saint Louis University, St. Louis, MO, US"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Saint Louis University, St. Louis, MO, US","institution_ids":["https://openalex.org/I47838141"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103953329"],"corresponding_institution_ids":["https://openalex.org/I47838141"],"apc_list":null,"apc_paid":null,"fwci":0.2784,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.58692001,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"323","issue":null,"first_page":"718","last_page":"723"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10878","display_name":"CRISPR and Genetic Engineering","score":0.9787999987602234,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10434","display_name":"Chromosomal and Genetic Variations","score":0.977400004863739,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7752723693847656},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7324533462524414},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6419729590415955},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.467781662940979},{"id":"https://openalex.org/keywords/sequence-assembly","display_name":"Sequence assembly","score":0.46656715869903564},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.45415109395980835},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4319906234741211},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.39610376954078674},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3767886161804199},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3538936376571655},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.16419139504432678},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.11870896816253662}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7752723693847656},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7324533462524414},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6419729590415955},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.467781662940979},{"id":"https://openalex.org/C18949551","wikidata":"https://www.wikidata.org/wiki/Q740578","display_name":"Sequence assembly","level":5,"score":0.46656715869903564},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.45415109395980835},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4319906234741211},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.39610376954078674},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3767886161804199},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3538936376571655},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.16419139504432678},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.11870896816253662},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0},{"id":"https://openalex.org/C162317418","wikidata":"https://www.wikidata.org/wiki/Q252857","display_name":"Transcriptome","level":4,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bibm.2018.8621546","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm.2018.8621546","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},{"id":"pmh:oai:osti.gov:1557475","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1557475","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:osti.gov:1557475","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1557475","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1579534339","https://openalex.org/W1635391495","https://openalex.org/W1905818355","https://openalex.org/W1966822396","https://openalex.org/W1972924519","https://openalex.org/W1988044297","https://openalex.org/W1992550017","https://openalex.org/W2033292629","https://openalex.org/W2037953066","https://openalex.org/W2058839892","https://openalex.org/W2114133992","https://openalex.org/W2118526609","https://openalex.org/W2125153875","https://openalex.org/W2128114769","https://openalex.org/W2131975293","https://openalex.org/W2132341951","https://openalex.org/W2140437680","https://openalex.org/W2156104322","https://openalex.org/W2160969485","https://openalex.org/W2161546116","https://openalex.org/W2189465200","https://openalex.org/W2213505663","https://openalex.org/W2522603484","https://openalex.org/W2950354111","https://openalex.org/W6687322159"],"related_works":["https://openalex.org/W4244478748","https://openalex.org/W1975949872","https://openalex.org/W3159871278","https://openalex.org/W2230552005","https://openalex.org/W4223488648","https://openalex.org/W2905242764","https://openalex.org/W2134969820","https://openalex.org/W4308507533","https://openalex.org/W2407107767","https://openalex.org/W4401278057"],"abstract_inverted_index":{"The":[0,104],"advent":[1],"of":[2,15,21,25,94,146],"high-throughput":[3],"DNA":[4],"sequencing":[5],"techniques":[6],"has":[7],"permitted":[8],"very":[9,98],"high":[10],"quality":[11],"de":[12,39],"novo":[13,40],"assemblies":[14],"genomes,":[16],"but":[17],"raise":[18],"an":[19],"issue":[20],"requiring":[22],"large":[23,31,99],"amounts":[24],"computer":[26],"memory":[27],"to":[28,96],"resolve":[29],"the":[30,64,80,102,150],"genome":[32,65],"graphs":[33,127],"generated":[34],"by":[35],"most":[36],"overlap":[37],"graph":[38,60,117],"assemblers.":[41],"To":[42,72],"address":[43],"these":[44],"limitations,":[45],"we":[46],"present":[47],"a":[48,68,88,112,119,129,133],"novel":[49],"algorithmic":[50],"approach;":[51],"Scalable":[52],"Overlap-graph":[53],"Reduction":[54],"Algorithms":[55],"(SORA).":[56],"SORA":[57,82,109,152],"adapts":[58],"string":[59],"reduction":[61],"algorithms":[62,139],"for":[63,76,156],"assembly":[66],"using":[67],"distributed":[69,120],"computing":[70],"platform.":[71],"efficiently":[73],"compute":[74],"coverage":[75],"enormous":[77],"paths":[78],"in":[79,101,118,149],"graphs,":[81],"uses":[83],"Apache":[84],"Spark":[85],"which":[86],"is":[87,153],"cluster-based":[89],"engine":[90],"designed":[91],"on":[92,128],"top":[93],"Hadoop":[95],"handle":[97],"datasets":[100],"cloud.":[103,151],"experimental":[105],"results":[106],"show":[107],"that":[108],"can":[110],"process":[111],"nearly":[113],"one":[114],"billion":[115],"edge":[116],"cloud":[121],"cluster":[122,131],"as":[123,125],"well":[124],"smaller":[126],"local":[130],"with":[132,143],"short":[134],"turnaround":[135],"time.":[136],"Moreover,":[137],"our":[138],"scale":[140],"almost":[141],"linearly":[142],"increasing":[144],"numbers":[145],"virtual":[147],"instances":[148],"freely":[154],"available":[155],"download":[157],"at":[158],"https://github.com/BioHPC/SORA/.":[159]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
