{"id":"https://openalex.org/W6903454680","doi":"https://doi.org/10.11588/heidok.00027013","title":"Modern Systems for Large-scale Genomics Data Analysis in the Cloud","display_name":"Modern Systems for Large-scale Genomics Data Analysis in the Cloud","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W6903454680","doi":"https://doi.org/10.11588/heidok.00027013"},"language":"en","primary_location":{"id":"pmh:oai:archiv.ub.uni-heidelberg.de:27013","is_oa":true,"landing_page_url":null,"pdf_url":"http://archiv.ub.uni-heidelberg.de/volltextserver/27013/1/sergei_thesis_submission_02_05_2019.pdf","source":{"id":"https://openalex.org/S4306402333","display_name":"heiDOK (Heidelberg University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I223822909","host_organization_name":"Heidelberg University","host_organization_lineage":["https://openalex.org/I223822909"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dissertation"},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://archiv.ub.uni-heidelberg.de/volltextserver/27013/1/sergei_thesis_submission_02_05_2019.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yakneen, Sergei","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yakneen, Sergei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.51753878,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.579200029373169,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.579200029373169,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.10159999877214432,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13937","display_name":"Genetics, Bioinformatics, and Biomedical Research","score":0.06449999660253525,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.847599983215332},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.8421000242233276},{"id":"https://openalex.org/keywords/timeline","display_name":"Timeline","score":0.6060000061988831},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5927000045776367},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5906999707221985},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.4936999976634979},{"id":"https://openalex.org/keywords/workflow-management-system","display_name":"Workflow management system","score":0.4869999885559082},{"id":"https://openalex.org/keywords/data-management","display_name":"Data management","score":0.4156000018119812}],"concepts":[{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.847599983215332},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.8421000242233276},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6974999904632568},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6866999864578247},{"id":"https://openalex.org/C4438859","wikidata":"https://www.wikidata.org/wiki/Q186117","display_name":"Timeline","level":2,"score":0.6060000061988831},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5927000045776367},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5906999707221985},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.4936999976634979},{"id":"https://openalex.org/C140824633","wikidata":"https://www.wikidata.org/wiki/Q2808660","display_name":"Workflow management system","level":3,"score":0.4869999885559082},{"id":"https://openalex.org/C1668388","wikidata":"https://www.wikidata.org/wiki/Q1149776","display_name":"Data management","level":2,"score":0.4156000018119812},{"id":"https://openalex.org/C118530786","wikidata":"https://www.wikidata.org/wiki/Q1134732","display_name":"Instrumentation (computer programming)","level":2,"score":0.3880999982357025},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.36649999022483826},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.3596999943256378},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3549000024795532},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.31869998574256897},{"id":"https://openalex.org/C188220564","wikidata":"https://www.wikidata.org/wiki/Q3325097","display_name":"Workflow engine","level":3,"score":0.3061999976634979},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2985999882221222},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.296999990940094},{"id":"https://openalex.org/C2780609101","wikidata":"https://www.wikidata.org/wiki/Q17156588","display_name":"Resource management (computing)","level":2,"score":0.29190000891685486},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.28360000252723694},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2775999903678894},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C138827492","wikidata":"https://www.wikidata.org/wiki/Q6661985","display_name":"Data processing","level":2,"score":0.26429998874664307}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:archiv.ub.uni-heidelberg.de:27013","is_oa":true,"landing_page_url":null,"pdf_url":"http://archiv.ub.uni-heidelberg.de/volltextserver/27013/1/sergei_thesis_submission_02_05_2019.pdf","source":{"id":"https://openalex.org/S4306402333","display_name":"heiDOK (Heidelberg University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I223822909","host_organization_name":"Heidelberg University","host_organization_lineage":["https://openalex.org/I223822909"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dissertation"},{"id":"doi:10.11588/heidok.00027013","is_oa":true,"landing_page_url":"https://doi.org/10.11588/heidok.00027013","pdf_url":null,"source":{"id":"https://openalex.org/S7407051545","display_name":"University Library Heidelberg","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:archiv.ub.uni-heidelberg.de:27013","is_oa":true,"landing_page_url":null,"pdf_url":"http://archiv.ub.uni-heidelberg.de/volltextserver/27013/1/sergei_thesis_submission_02_05_2019.pdf","source":{"id":"https://openalex.org/S4306402333","display_name":"heiDOK (Heidelberg University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I223822909","host_organization_name":"Heidelberg University","host_organization_lineage":["https://openalex.org/I223822909"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dissertation"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.644200325012207,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W6903454680.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Genomics":[0,319],"researchers":[1],"increasingly":[2],"turn":[3],"to":[4,27,65,76,104,113,149,175,211,229,248,285,296,303,345,368,430,466],"cloud":[5,21,64,208],"computing":[6],"as":[7,32,359,413],"a":[8,49,106,281,459],"means":[9],"of":[10,48,71,115,120,153,166,181,190,222,273,288,309,326,352,374,449,455,472],"accomplishing":[11],"large-scale":[12,58,236],"analyses":[13,138],"efficiently":[14],"and":[15,25,35,44,90,94,111,118,126,135,164,193,243,276,300,323,366,382,399,420,457,463],"cost-effectively.":[16],"Successful":[17],"operation":[18],"in":[19,62,102,206,219,251,266,396,402,452,469],"the":[20,46,54,63,158,201,207,220,233,249,269,286,294,298,324,350,363,375,414,432,442,453,470],"requires":[22],"careful":[23],"instrumentation":[24],"management":[26,61],"avoid":[28],"common":[29],"pitfalls,":[30],"such":[31,358,412],"resource":[33,88],"bottlenecks":[34],"low":[36],"utilisation":[37],"that":[38,108,137,195,355,391,424],"can":[39],"both":[40],"drive":[41,356],"up":[42,231],"costs":[43],"extend":[45],"timeline":[47],"scientific":[50,59,132,204],"project.":[51],"&#13;\\n&#13;\\nWe":[52],"developed":[53,218,387,426],"Butler":[55,72,96],"framework":[56,107,183],"for":[57,203,209,235,240,338,435],"workflow":[60,121,133],"meet":[66],"these":[67,262],"challenges.":[68],"The":[69,178,254],"cornerstones":[70],"design":[73,180],"are:":[74],"ability":[75],"support":[77],"multiple":[78],"clouds,":[79,278],"declarative":[80],"infrastructure":[81],"configuration":[82],"management,":[83],"scalable,":[84],"fault-tolerant":[85],"operation,":[86],"comprehensive":[87],"monitoring,":[89],"automated":[91],"error":[92,124],"detection":[93,125],"recovery.":[95],"relies":[97],"on":[98,157,333,441],"industry-strength":[99],"open-source":[100],"components":[101],"order":[103],"deliver":[105],"is":[109,258,406],"robust":[110],"scalable":[112],"thousands":[114],"compute":[116,271],"cores":[117],"millions":[119],"executions.":[122],"Butler\u2019s":[123],"self-healing":[127],"capabilities":[128],"are":[129,139,331],"unique":[130],"among":[131],"frameworks":[134,423],"ensure":[136],"carried":[140],"out":[141],"with":[142,170,200,232,261,408],"minimal":[143],"human":[144],"intervention.&#13;\\n&#13;\\nButler":[145],"has":[146],"been":[147,217],"used":[148],"analyse":[150],"over":[151],"725TB":[152],"DNA":[154],"sequencing":[155,245,370],"data":[156,237,264,311,439],"cloud,":[159,443],"using":[160],"1500":[161],"CPU":[162],"cores,":[163],"6TB":[165],"RAM,":[167],"delivering":[168],"results":[169],"43\\\\%":[171],"increased":[172,306],"efficiency":[173],"compared":[174],"other":[176,188,409],"tools.":[177],"flexible":[179],"this":[182],"allows":[184,293],"easy":[185],"adoption":[186],"within":[187,388],"fields":[189],"Life":[191],"Sciences":[192],"ensures":[194],"it":[196],"will":[197],"scale":[198,437],"together":[199],"demand":[202],"analysis":[205,440],"years":[210],"come.&#13;\\n&#13;\\nBecause":[212],"many":[213],"bioinformatics":[214,290],"tools":[215,411],"have":[216],"context":[221],"small":[223],"sample":[224],"sizes":[225],"they":[226],"often":[227],"struggle":[228],"keep":[230],"demands":[234],"processing":[238,339],"required":[239,302],"modern":[241,274,289],"research":[242],"clinical":[244,369],"projects":[246,315],"due":[247],"limitations":[250],"their":[252],"design.":[253],"Rheos":[255,279,344,389,392],"software":[256,295],"system":[257],"designed":[259],"specifically":[260],"large":[263,436],"sets":[265,312],"mind.":[267],"Utilising":[268],"elastic":[270],"capacity":[272],"academic":[275],"commercial":[277],"takes":[280],"service-oriented":[282],"containerised":[283],"approach":[284,337,465],"implementation":[287],"algorithms,":[291],"which":[292,342,405],"achieve":[297],"scalability":[299],"ease-of-use":[301],"succeed":[304],"under":[305],"operational":[307],"load":[308],"massive":[310],"generated":[313],"by":[314,444],"like":[316],"International":[317],"Cancer":[318],"Consortium":[320],"(ICGC)":[321],"Argo":[322],"All":[325],"Us":[327],"initiative.":[328],"&#13;\\n&#13;\\nRheos":[329],"algorithms":[330,386],"based":[332],"an":[334],"innovative":[335],"stream-based":[336],"genomic":[340,353,438,467],"data,":[341],"enables":[343],"make":[346],"faster":[347],"decisions":[348],"about":[349],"presence":[351],"mutations":[354],"diseases":[357],"cancer,":[360],"thereby":[361],"improving":[362],"tools'":[364],"efficacy":[365],"relevance":[367],"applications.":[371],"Our":[372],"testing":[373],"novel":[376],"germline":[377],"Single":[378],"Nucleotide":[379],"Polymorphism":[380],"(SNP)":[381],"deletion":[383,403],"variant":[384],"calling":[385,398],"indicates":[390],"achieves":[393],"~98\\\\%":[394],"accuracy":[395,401],"SNP":[397],"~85\\\\%":[400],"calling,":[404],"comparable":[407],"leading":[410],"Genome":[415],"Analysis":[416],"Toolkit":[417],"(GATK),":[418],"freebayes,":[419],"Delly.&#13;\\n&#13;\\nThe":[421],"two":[422],"we":[425],"provide":[427],"important":[428],"contributions":[429],"solve":[431],"ever-growing":[433],"need":[434],"enabling":[445],"more":[446,461],"effective":[447],"use":[448],"existing":[450],"tools,":[451],"case":[454,471],"Butler,":[456],"providing":[458],"new,":[460],"dynamic":[462],"real-time":[464],"analysis,":[468],"Rheos.":[473]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
