{"id":"https://openalex.org/W2745862296","doi":"https://doi.org/10.1145/3107411.3107438","title":"SparkGA","display_name":"SparkGA","publication_year":2017,"publication_date":"2017-08-20","ids":{"openalex":"https://openalex.org/W2745862296","doi":"https://doi.org/10.1145/3107411.3107438","mag":"2745862296"},"language":"en","primary_location":{"id":"doi:10.1145/3107411.3107438","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3107411.3107438","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th ACM International Conference on Bioinformatics, Computational Biology,and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002503405","display_name":"Hamid Mushtaq","orcid":"https://orcid.org/0000-0003-4129-9056"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Hamid Mushtaq","raw_affiliation_strings":["TU Delft, Delft, Netherlands"],"affiliations":[{"raw_affiliation_string":"TU Delft, Delft, Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006587869","display_name":"Frank Liu","orcid":"https://orcid.org/0000-0001-6615-0739"},"institutions":[{"id":"https://openalex.org/I4210156936","display_name":"IBM Research - Austin","ror":"https://ror.org/05gjbbg60","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210156936"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Frank Liu","raw_affiliation_strings":["IBM, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"IBM, Austin, TX, USA","institution_ids":["https://openalex.org/I4210156936"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100670597","display_name":"Carlos Costa","orcid":"https://orcid.org/0000-0003-0011-6030"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carlos Costa","raw_affiliation_strings":["IBM, Yorktown, NY, USA"],"affiliations":[{"raw_affiliation_string":"IBM, Yorktown, NY, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109282686","display_name":"Gang Liu","orcid":"https://orcid.org/0000-0002-7032-8429"},"institutions":[{"id":"https://openalex.org/I4210156936","display_name":"IBM Research - Austin","ror":"https://ror.org/05gjbbg60","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210156936"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gang Liu","raw_affiliation_strings":["IBM, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"IBM, Austin, TX, USA","institution_ids":["https://openalex.org/I4210156936"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062909392","display_name":"H. Peter Hofstee","orcid":"https://orcid.org/0000-0001-9649-7338"},"institutions":[{"id":"https://openalex.org/I4210156936","display_name":"IBM Research - Austin","ror":"https://ror.org/05gjbbg60","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210156936"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peter Hofstee","raw_affiliation_strings":["IBM, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"IBM, Austin, TX, USA","institution_ids":["https://openalex.org/I4210156936"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021955713","display_name":"Zaid Al-Ars","orcid":"https://orcid.org/0000-0001-7670-8572"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Zaid Al-Ars","raw_affiliation_strings":["TU Delft, Delft, Netherlands"],"affiliations":[{"raw_affiliation_string":"TU Delft, Delft, Netherlands","institution_ids":["https://openalex.org/I98358874"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5002503405"],"corresponding_institution_ids":["https://openalex.org/I98358874"],"apc_list":null,"apc_paid":null,"fwci":4.3605,"has_fulltext":false,"cited_by_count":28,"citation_normalized_percentile":{"value":0.95480889,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"148","last_page":"157"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8733117580413818},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7354945540428162},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7319400310516357},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.6521010398864746},{"id":"https://openalex.org/keywords/parallelizable-manifold","display_name":"Parallelizable manifold","score":0.6341017484664917},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5149462819099426},{"id":"https://openalex.org/keywords/pipeline-transport","display_name":"Pipeline transport","score":0.5110505819320679},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.5105975270271301},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4942498505115509},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.453318327665329},{"id":"https://openalex.org/keywords/node","display_name":"Node (physics)","score":0.44397565722465515},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.42742449045181274},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4111478328704834},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.28706368803977966},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2031661570072174},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.14661359786987305}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8733117580413818},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7354945540428162},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7319400310516357},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.6521010398864746},{"id":"https://openalex.org/C148047603","wikidata":"https://www.wikidata.org/wiki/Q1014612","display_name":"Parallelizable manifold","level":2,"score":0.6341017484664917},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5149462819099426},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.5110505819320679},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.5105975270271301},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4942498505115509},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.453318327665329},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.44397565722465515},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.42742449045181274},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4111478328704834},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.28706368803977966},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2031661570072174},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.14661359786987305},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C87717796","wikidata":"https://www.wikidata.org/wiki/Q146326","display_name":"Environmental engineering","level":1,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3107411.3107438","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3107411.3107438","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th ACM International Conference on Bioinformatics, Computational Biology,and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1919257374","https://openalex.org/W1995448823","https://openalex.org/W2095680943","https://openalex.org/W2131187246","https://openalex.org/W2159670728","https://openalex.org/W2166588423","https://openalex.org/W2170551349","https://openalex.org/W2173213060","https://openalex.org/W2189465200","https://openalex.org/W2202435095","https://openalex.org/W2205403270","https://openalex.org/W2398924165","https://openalex.org/W2407107767","https://openalex.org/W6683651552","https://openalex.org/W6687322159"],"related_works":["https://openalex.org/W4380433113","https://openalex.org/W4386072068","https://openalex.org/W252339960","https://openalex.org/W2390529043","https://openalex.org/W2378320433","https://openalex.org/W2358343511","https://openalex.org/W2799508461","https://openalex.org/W2051877971","https://openalex.org/W1970117064","https://openalex.org/W1787170397"],"abstract_inverted_index":{"In":[0,83,124],"recent":[1],"years,":[2],"the":[3,34,58,64,75,98,128,131,151,190],"cost":[4],"of":[5,26,36,38,63,74,91,111,167,189],"NGS":[6,30],"(Next":[7],"Generation":[8],"Sequencing)":[9],"technology":[10],"has":[11],"dramatically":[12],"reduced,":[13],"making":[14],"it":[15],"a":[16,88,92,161],"viable":[17],"method":[18],"for":[19,69],"diagnosing":[20],"genetic":[21],"diseases.":[22],"The":[23,52,186],"large":[24],"amount":[25],"data":[27,100],"generated":[28],"by":[29,114],"technology,":[31],"usually":[32],"in":[33,193],"order":[35,125],"hundreds":[37],"gigabytes":[39],"per":[40],"experiment,":[41],"have":[42],"to":[43,47,126,156,169],"be":[44,154],"analyzed":[45],"quickly":[46],"generate":[48],"meaningful":[49],"variant":[50],"results.":[51],"GATK":[53,76],"best":[54],"practices":[55],"pipeline":[56,77,95],"from":[57],"Broad":[59],"Institute":[60],"is":[61,106,173,196],"one":[62],"most":[65],"popular":[66],"computational":[67],"pipelines":[68],"DNA":[70,93],"analysis.":[71],"Many":[72],"components":[73],"are":[78],"not":[79],"very":[80],"parallelizable":[81],"though.":[82],"this":[84,194],"paper,":[85],"we":[86,148],"present":[87],"parallel":[89],"implementation":[90,105],"analysis":[94,129],"based":[96],"on":[97,135,160],"big":[99],"Apache":[101],"Spark":[102],"framework.":[103],"This":[104],"highly":[107],"scalable":[108],"and":[109],"capable":[110],"parallelizing":[112],"computation":[113],"utilizing":[115],"data-level":[116],"parallelism":[117],"as":[118,120,138,141],"well":[119],"load":[121],"balancing":[122],"techniques.":[123],"reduce":[127],"cost,":[130],"framework":[132],"can":[133,153],"run":[134],"nodes":[136],"with":[137,164],"little":[139],"memory":[140],"16GB.":[142],"For":[143],"whole":[144],"genome":[145],"sequencing":[146],"experiments,":[147],"show":[149],"that":[150],"runtime":[152],"reduced":[155],"about":[157,174],"1.5":[158],"hours":[159],"20-node":[162],"cluster":[163],"an":[165],"accuracy":[166],"up":[168],"99.9981%.":[170],"Our":[171],"solution":[172],"71%":[175],"faster":[176],"than":[177],"other":[178],"state-of-the-art":[179],"solutions":[180],"while":[181],"also":[182],"being":[183],"more":[184],"accurate.":[185],"source":[187],"code":[188],"software":[191],"described":[192],"paper":[195],"publicly":[197],"available":[198],"at":[199],"https://github.com/HamidMushtaq/SparkGA1.git.":[200]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2017-08-31T00:00:00"}
