{"id":"https://openalex.org/W2890925057","doi":"https://doi.org/10.1145/3236367.3236385","title":"MPI Stages","display_name":"MPI Stages","publication_year":2018,"publication_date":"2018-09-19","ids":{"openalex":"https://openalex.org/W2890925057","doi":"https://doi.org/10.1145/3236367.3236385","mag":"2890925057"},"language":"en","primary_location":{"id":"doi:10.1145/3236367.3236385","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3236367.3236385","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th European MPI Users' Group Meeting","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077782526","display_name":"Nawrin Sultana","orcid":"https://orcid.org/0000-0003-1597-3369"},"institutions":[{"id":"https://openalex.org/I82497590","display_name":"Auburn University","ror":"https://ror.org/02v80fc35","country_code":"US","type":"education","lineage":["https://openalex.org/I82497590"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Nawrin Sultana","raw_affiliation_strings":["Auburn University, Auburn, AL"],"affiliations":[{"raw_affiliation_string":"Auburn University, Auburn, AL","institution_ids":["https://openalex.org/I82497590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026440046","display_name":"Anthony Skjellum","orcid":"https://orcid.org/0000-0001-5252-6600"},"institutions":[{"id":"https://openalex.org/I177097968","display_name":"University of Tennessee at Chattanooga","ror":"https://ror.org/00nqb1v70","country_code":"US","type":"education","lineage":["https://openalex.org/I177097968"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anthony Skjellum","raw_affiliation_strings":["University of Tennessee at Chattanooga, Chattanooga, TN"],"affiliations":[{"raw_affiliation_string":"University of Tennessee at Chattanooga, Chattanooga, TN","institution_ids":["https://openalex.org/I177097968"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033868370","display_name":"Ignacio Laguna","orcid":"https://orcid.org/0000-0002-9374-4433"},"institutions":[{"id":"https://openalex.org/I1282311441","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282311441","https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210138311"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ignacio Laguna","raw_affiliation_strings":["Lawrence Livermore National Laboratory, Livermore, CA"],"affiliations":[{"raw_affiliation_string":"Lawrence Livermore National Laboratory, Livermore, CA","institution_ids":["https://openalex.org/I1282311441"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051517667","display_name":"Matthew Shane Farmer","orcid":null},"institutions":[{"id":"https://openalex.org/I82497590","display_name":"Auburn University","ror":"https://ror.org/02v80fc35","country_code":"US","type":"education","lineage":["https://openalex.org/I82497590"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew Shane Farmer","raw_affiliation_strings":["Auburn University, Auburn, AL"],"affiliations":[{"raw_affiliation_string":"Auburn University, Auburn, AL","institution_ids":["https://openalex.org/I82497590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080122988","display_name":"Kathryn Mohror","orcid":"https://orcid.org/0000-0002-1366-1655"},"institutions":[{"id":"https://openalex.org/I1282311441","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282311441","https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210138311"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kathryn Mohror","raw_affiliation_strings":["Lawrence Livermore National Laboratory, Livermore, CA"],"affiliations":[{"raw_affiliation_string":"Lawrence Livermore National Laboratory, Livermore, CA","institution_ids":["https://openalex.org/I1282311441"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014448094","display_name":"Murali Emani","orcid":"https://orcid.org/0000-0002-6279-0007"},"institutions":[{"id":"https://openalex.org/I1282311441","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282311441","https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210138311"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Murali Emani","raw_affiliation_strings":["Lawrence Livermore National Laboratory, Livermore, CA"],"affiliations":[{"raw_affiliation_string":"Lawrence Livermore National Laboratory, Livermore, CA","institution_ids":["https://openalex.org/I1282311441"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5077782526"],"corresponding_institution_ids":["https://openalex.org/I82497590"],"apc_list":null,"apc_paid":null,"fwci":1.1072,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.80583268,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8543587923049927},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6852840185165405},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5738176107406616},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5678420662879944},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.474382221698761},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.396908164024353},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.37551188468933105},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.334097683429718},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08510488271713257}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8543587923049927},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6852840185165405},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5738176107406616},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5678420662879944},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.474382221698761},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.396908164024353},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.37551188468933105},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.334097683429718},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08510488271713257}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3236367.3236385","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3236367.3236385","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th European MPI Users' Group Meeting","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.5199999809265137,"id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320317073","display_name":"University of Tennessee at Chattanooga","ror":"https://ror.org/00nqb1v70"},{"id":"https://openalex.org/F4320338286","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53"},{"id":"https://openalex.org/F4320338292","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1576341769","https://openalex.org/W1695243693","https://openalex.org/W1929619420","https://openalex.org/W1981432246","https://openalex.org/W1984564341","https://openalex.org/W2001495258","https://openalex.org/W2017060126","https://openalex.org/W2021234574","https://openalex.org/W2036641664","https://openalex.org/W2037208432","https://openalex.org/W2045879521","https://openalex.org/W2084293824","https://openalex.org/W2102576651","https://openalex.org/W2105039796","https://openalex.org/W2116115793","https://openalex.org/W2128577831","https://openalex.org/W2130604611","https://openalex.org/W2340533753","https://openalex.org/W2521708680","https://openalex.org/W2990714382","https://openalex.org/W7045712777"],"related_works":["https://openalex.org/W2978729728","https://openalex.org/W4288966080","https://openalex.org/W2418291489","https://openalex.org/W1530347314","https://openalex.org/W2510374584","https://openalex.org/W2134852660","https://openalex.org/W2098482419","https://openalex.org/W4254638342","https://openalex.org/W2054468904","https://openalex.org/W1494564945"],"abstract_inverted_index":{"When":[0],"an":[1],"MPI":[2,79,83,97,115],"program":[3],"experiences":[4],"a":[5,18,86,122,140],"failure,":[6,95],"the":[7,24,35,39,49,52,74,113,126,134,137],"most":[8],"common":[9],"recovery":[10],"approach":[11,154],"is":[12,32],"to":[13,22,133],"restart":[14],"all":[15],"processes":[16,44,118],"from":[17,48,104],"previous":[19],"checkpoint":[20,88],"and":[21,69,98,109,146,162],"re-queue":[23],"entire":[25],"job.":[26,116],"A":[27],"disadvantage":[28],"of":[29,51,76,130,136,142],"this":[30],"method":[31],"that,":[33],"although":[34],"failure":[36,151],"occurred":[37],"within":[38,125],"main":[40,127],"application":[41,92,99],"loop,":[42],"live":[43,63],"must":[45],"start":[46],"again":[47],"beginning":[50,135],"program,":[53,138],"along":[54],"with":[55,91,157],"new":[56],"replacement":[57,141],"processes---this":[58],"incurs":[59],"unnecessary":[60],"overhead":[61],"for":[62],"processes.":[64],"To":[65],"avoid":[66],"such":[67],"overheads":[68],"concomitant":[70],"delays,":[71],"we":[72],"introduce":[73],"concept":[75],"\"MPI":[77],"Stages.\"":[78],"Stages":[80],"saves":[81],"internal":[82],"state":[84,100],"in":[85,89],"separate":[87],"conjunction":[90],"state.":[93],"Upon":[94],"both":[96],"are":[101],"recovered,":[102],"respectively,":[103],"their":[105],"last":[106],"synchronous":[107,160],"checkpoints":[108],"continue":[110],"without":[111],"restarting":[112],"overall":[114],"Live":[117],"roll":[119],"back":[120,132],"only":[121],"few":[123],"iterations":[124],"loop":[128],"instead":[129],"rolling":[131],"while":[139],"failed":[143],"process":[144],"restarts":[145],"reintegrates,":[147],"thereby":[148],"achieving":[149],"faster":[150],"recovery.":[152],"This":[153],"integrates":[155],"well":[156],"large-scale,":[158],"bulk":[159],"applications":[161],"checkpoint/restart.":[163]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2018-09-27T00:00:00"}
