{"id":"https://openalex.org/W2990586881","doi":"https://doi.org/10.1109/hpec.2019.8916436","title":"Using Container Migration for HPC Workloads Resilience","display_name":"Using Container Migration for HPC Workloads Resilience","publication_year":2019,"publication_date":"2019-09-01","ids":{"openalex":"https://openalex.org/W2990586881","doi":"https://doi.org/10.1109/hpec.2019.8916436","mag":"2990586881"},"language":"en","primary_location":{"id":"doi:10.1109/hpec.2019.8916436","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2019.8916436","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111707760","display_name":"Mohamad Sindi","orcid":null},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mohamad Sindi","raw_affiliation_strings":["Center for Computational Engineering, Massachusetts Institute of Technology, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Center for Computational Engineering, Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101672405","display_name":"John Williams","orcid":"https://orcid.org/0000-0002-6118-0434"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John R. Williams","raw_affiliation_strings":["Department of Civil and Environmental Engineering, Massachusetts Institute of Technology, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Civil and Environmental Engineering, Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5111707760"],"corresponding_institution_ids":["https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":2.4631,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.91679547,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8496044874191284},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7114399075508118},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.6845980882644653},{"id":"https://openalex.org/keywords/container","display_name":"Container (type theory)","score":0.6747377514839172},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.6628710627555847},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.619285523891449},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.5574359893798828},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.4935683012008667},{"id":"https://openalex.org/keywords/resilience","display_name":"Resilience (materials science)","score":0.444105863571167},{"id":"https://openalex.org/keywords/node","display_name":"Node (physics)","score":0.44397449493408203},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4360790550708771},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.42339959740638733},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.42118164896965027},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3257165849208832},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.07789400219917297}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8496044874191284},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7114399075508118},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.6845980882644653},{"id":"https://openalex.org/C2781018962","wikidata":"https://www.wikidata.org/wiki/Q5164884","display_name":"Container (type theory)","level":2,"score":0.6747377514839172},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.6628710627555847},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.619285523891449},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.5574359893798828},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.4935683012008667},{"id":"https://openalex.org/C2779585090","wikidata":"https://www.wikidata.org/wiki/Q3457762","display_name":"Resilience (materials science)","level":2,"score":0.444105863571167},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.44397449493408203},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4360790550708771},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.42339959740638733},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.42118164896965027},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3257165849208832},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.07789400219917297},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec.2019.8916436","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2019.8916436","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.7699999809265137}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W307918303","https://openalex.org/W1498712972","https://openalex.org/W1569892934","https://openalex.org/W1701369565","https://openalex.org/W1825216778","https://openalex.org/W1984712701","https://openalex.org/W1995644038","https://openalex.org/W2012752667","https://openalex.org/W2036641664","https://openalex.org/W2065547432","https://openalex.org/W2072341511","https://openalex.org/W2075174112","https://openalex.org/W2079839497","https://openalex.org/W2096611465","https://openalex.org/W2139244298","https://openalex.org/W2140953464","https://openalex.org/W2142892618","https://openalex.org/W2167267243","https://openalex.org/W2169631286","https://openalex.org/W2264524816","https://openalex.org/W2319800376","https://openalex.org/W2401070491","https://openalex.org/W2471032162","https://openalex.org/W2475755343","https://openalex.org/W2522551977","https://openalex.org/W2527842547","https://openalex.org/W2552349527","https://openalex.org/W2775134884","https://openalex.org/W2912987619","https://openalex.org/W3000664081","https://openalex.org/W3019043644","https://openalex.org/W6631615826"],"related_works":["https://openalex.org/W2384867379","https://openalex.org/W4400094300","https://openalex.org/W2329539859","https://openalex.org/W2227905990","https://openalex.org/W2765823764","https://openalex.org/W3214280620","https://openalex.org/W3191490922","https://openalex.org/W2794038527","https://openalex.org/W2327638088","https://openalex.org/W2151092287"],"abstract_inverted_index":{"We":[0,83,115,136],"share":[1],"experiences":[2],"in":[3],"implementing":[4],"a":[5],"containerbased":[6],"HPC":[7,14,66,100,168],"environment":[8],"that":[9,127],"could":[10],"help":[11],"sustain":[12],"running":[13,19],"workloads":[15,20,40,101,169],"on":[16,88,130],"clusters.":[17],"By":[18],"inside":[21,102],"containers,":[22],"we":[23,154],"are":[24,41,68,71],"able":[25],"to":[26,35,61,93,134,161],"migrate":[27],"them":[28],"from":[29],"cluster":[30],"nodes":[31,37],"anticipating":[32],"hardware":[33,75],"problems,":[34],"healthy":[36],"while":[38,107],"the":[39,47,62,86,109,112,121,140,149,159],"running.":[42],"Migration":[43],"is":[44,59,132,158],"done":[45,72],"using":[46,170],"CRIU":[48,171],"tool":[49],"with":[50,73,104],"no":[51],"application":[52,128],"modification.":[53],"No":[54],"major":[55],"interruption":[56],"or":[57],"overhead":[58],"introduced":[60],"workload.":[63],"Various":[64],"real":[65,166],"applications":[67,87],"tested.":[69],"Tests":[70],"different":[74],"node":[76],"specs,":[77],"network":[78],"interconnects,":[79],"and":[80,90,145,172],"MPI":[81],"implementations.":[82],"also":[84,125],"benchmark":[85],"containers":[89,103,131],"compare":[91],"performance":[92,129],"native.":[94,135],"Results":[95],"demonstrate":[96,162],"successful":[97,163],"migration":[98,122,164],"of":[99,111,139,151,165],"minimal":[105],"interruption,":[106],"maintaining":[108],"integrity":[110],"results":[113],"produced.":[114],"provide":[116],"several":[117],"YouTube":[118],"videos":[119],"demonstrating":[120],"tests.":[123],"Benchmarks":[124],"show":[126],"close":[133],"discuss":[137],"some":[138],"challenges":[141],"faced":[142],"during":[143],"implementation":[144],"solutions":[146],"adopted.":[147],"To":[148],"best":[150],"our":[152],"knowledge,":[153],"believe":[155],"this":[156],"work":[157],"first":[160],"MPI-based":[167],"containers.":[173]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2025-10-10T00:00:00"}
