{"id":"https://openalex.org/W2606756653","doi":"https://doi.org/10.1145/3086157.3086165","title":"Optimal Checkpointing Period with Replicated Execution on Heterogeneous Platforms","display_name":"Optimal Checkpointing Period with Replicated Execution on Heterogeneous Platforms","publication_year":2017,"publication_date":"2017-06-23","ids":{"openalex":"https://openalex.org/W2606756653","doi":"https://doi.org/10.1145/3086157.3086165","mag":"2606756653"},"language":"en","primary_location":{"id":"doi:10.1145/3086157.3086165","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3086157.3086165","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 Workshop on Fault-Tolerance for HPC at Extreme Scale","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://inria.hal.science/hal-01504936","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037778045","display_name":"Anne Beno\u00eet","orcid":"https://orcid.org/0000-0003-2910-3540"},"institutions":[{"id":"https://openalex.org/I113428412","display_name":"\u00c9cole Normale Sup\u00e9rieure de Lyon","ror":"https://ror.org/04zmssz18","country_code":"FR","type":"education","lineage":["https://openalex.org/I113428412","https://openalex.org/I203339264"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Anne Benoit","raw_affiliation_strings":["LIP, ENS Lyon, Lyon, France"],"affiliations":[{"raw_affiliation_string":"LIP, ENS Lyon, Lyon, France","institution_ids":["https://openalex.org/I113428412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020464041","display_name":"Aur\u00e9lien Cavelan","orcid":"https://orcid.org/0000-0002-1784-0730"},"institutions":[{"id":"https://openalex.org/I113428412","display_name":"\u00c9cole Normale Sup\u00e9rieure de Lyon","ror":"https://ror.org/04zmssz18","country_code":"FR","type":"education","lineage":["https://openalex.org/I113428412","https://openalex.org/I203339264"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Aur\u00e9lien Cavelan","raw_affiliation_strings":["LIP, ENS Lyon, Lyon, France"],"affiliations":[{"raw_affiliation_string":"LIP, ENS Lyon, Lyon, France","institution_ids":["https://openalex.org/I113428412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068707481","display_name":"Valentin Le F\u00e8vre","orcid":"https://orcid.org/0000-0001-6853-5392"},"institutions":[{"id":"https://openalex.org/I113428412","display_name":"\u00c9cole Normale Sup\u00e9rieure de Lyon","ror":"https://ror.org/04zmssz18","country_code":"FR","type":"education","lineage":["https://openalex.org/I113428412","https://openalex.org/I203339264"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Valentin Le F\u00e8vre","raw_affiliation_strings":["LIP, ENS Lyon, Lyon, France"],"affiliations":[{"raw_affiliation_string":"LIP, ENS Lyon, Lyon, France","institution_ids":["https://openalex.org/I113428412"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001838181","display_name":"Yves Robert","orcid":"https://orcid.org/0000-0003-2361-055X"},"institutions":[{"id":"https://openalex.org/I113428412","display_name":"\u00c9cole Normale Sup\u00e9rieure de Lyon","ror":"https://ror.org/04zmssz18","country_code":"FR","type":"education","lineage":["https://openalex.org/I113428412","https://openalex.org/I203339264"]},{"id":"https://openalex.org/I4210144566","display_name":"Laboratoire de l'Informatique du Parall\u00e9lisme","ror":"https://ror.org/04msnz457","country_code":"FR","type":"facility","lineage":["https://openalex.org/I100532134","https://openalex.org/I113428412","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I203339264","https://openalex.org/I203339264","https://openalex.org/I4210144566"]},{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["FR","US"],"is_corresponding":false,"raw_author_name":"Yves Robert","raw_affiliation_strings":["LIP, ENS Lyon &amp; University of Tennessee, Knoxville, Lyon, France","ICL - Innovative Computing Laboratory [Knoxville] (Suite 203 Claxton\r\n1122 Volunteer Blvd\r\nKnoxville, TN 37996 - United States)"],"affiliations":[{"raw_affiliation_string":"LIP, ENS Lyon &amp; University of Tennessee, Knoxville, Lyon, France","institution_ids":["https://openalex.org/I75027704","https://openalex.org/I4210144566","https://openalex.org/I113428412"]},{"raw_affiliation_string":"ICL - Innovative Computing Laboratory [Knoxville] (Suite 203 Claxton\r\n1122 Volunteer Blvd\r\nKnoxville, TN 37996 - United States)","institution_ids":["https://openalex.org/I75027704"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5037778045"],"corresponding_institution_ids":["https://openalex.org/I113428412"],"apc_list":null,"apc_paid":null,"fwci":0.70757605,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.70760973,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"9","last_page":"16"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10933","display_name":"Real-Time Systems Scheduling","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9868999719619751,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8548680543899536},{"id":"https://openalex.org/keywords/replicate","display_name":"Replicate","score":0.7225570678710938},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.6489824652671814},{"id":"https://openalex.org/keywords/execution-time","display_name":"Execution time","score":0.5931789875030518},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5661443471908569},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.4275248646736145},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.417305588722229}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8548680543899536},{"id":"https://openalex.org/C2781162219","wikidata":"https://www.wikidata.org/wiki/Q26250693","display_name":"Replicate","level":2,"score":0.7225570678710938},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6489824652671814},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.5931789875030518},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5661443471908569},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.4275248646736145},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.417305588722229},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3086157.3086165","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3086157.3086165","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 Workshop on Fault-Tolerance for HPC at Extreme Scale","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-01504936v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-01504936","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"[Research Report] RR-9055, INRIA. 2017","raw_type":"Reports"},{"id":"pmh:oai:HAL:hal-02082847v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-02082847","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2017 Workshop on Fault-Tolerance for HPC at Extreme Scale FTXS, Jun 2017, Washington, United States. pp.9-16, &#x27E8;10.1145/3086157.3086165&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-01504936v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-01504936","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"[Research Report] RR-9055, INRIA. 2017","raw_type":"Reports"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W57243869","https://openalex.org/W63848143","https://openalex.org/W79276452","https://openalex.org/W1584278023","https://openalex.org/W1595409123","https://openalex.org/W1984564341","https://openalex.org/W1986905947","https://openalex.org/W2001219263","https://openalex.org/W2014767141","https://openalex.org/W2033656974","https://openalex.org/W2035492130","https://openalex.org/W2063924830","https://openalex.org/W2067883080","https://openalex.org/W2089536264","https://openalex.org/W2098631346","https://openalex.org/W2133046454","https://openalex.org/W2137787140","https://openalex.org/W2150819396","https://openalex.org/W2167899274","https://openalex.org/W2320681231","https://openalex.org/W2466095195","https://openalex.org/W4233783938"],"related_works":["https://openalex.org/W4254851101","https://openalex.org/W3171007296","https://openalex.org/W22115721","https://openalex.org/W2321234655","https://openalex.org/W2065444835","https://openalex.org/W4394550905","https://openalex.org/W2470062578","https://openalex.org/W2952773340","https://openalex.org/W2981861370","https://openalex.org/W4229503580"],"abstract_inverted_index":{"In":[0],"this":[1,73],"paper,":[2],"we":[3],"design":[4],"and":[5,43,104,108],"analyze":[6],"strategies":[7],"to":[8,19,67,75,102,127],"replicate":[9],"the":[10,30,61,90,121,133,147,156],"execution":[11,66,129],"of":[12,47,123,145],"an":[13],"application":[14],"on":[15,23],"two":[16],"different":[17,137],"platformssubject":[18],"failures,":[20],"using":[21],"checkpointing":[22,36,79,158],"a":[24,34,59,76,82,94,124,142],"shared":[25],"stable":[26],"storage.":[27],"We":[28,96],"derive":[29],"optimal":[31,105],"pattern":[32,57,106],"size~$W$for":[33],"periodic":[35,157],"strategy":[37,74,159],"where":[38,81],"both":[39],"platforms":[40,134],"concurrently":[41],"try":[42],"execute":[44],"$W$":[45],"units":[46],"work":[48],"before":[49],"checkpointing.":[50],"The":[51,118],"first":[52,98],"platform":[53,63,92,126,165],"that":[54,70,155],"completes":[55],"its":[56,65],"takes":[58],"checkpoint,and":[60],"other":[62,91],"interrupts":[64],"synchronize":[68],"from":[69],"checkpoint.We":[71],"compare":[72],"simpler":[77],"on-failure":[78],"strategy,":[80],"checkpoint":[83],"is":[84,149,160],"taken":[85],"by":[86,151],"one":[87],"platformonly":[88],"whenever":[89],"encounters":[93],"failure.":[95],"use":[97],"or":[99],"second-order":[100],"approximations":[101],"computeoverheads":[103],"sizes,":[107],"show":[109,120],"through":[110],"extensive":[111],"simulationsthat":[112],"these":[113],"models":[114],"are":[115,167],"very":[116],"accurate.":[117],"simulations":[119,153],"usefulness":[122],"secondary":[125],"reduce":[128],"time,":[130],"even":[131],"when":[132],"have":[135],"relatively":[136],"speeds:":[138],"in":[139],"average,":[140],"over":[141],"wide":[143],"range":[144],"scenarios,":[146],"overhead":[148],"reduced":[150],"$30\\%$.The":[152],"alsodemonstrate":[154],"globally":[161],"more":[162],"efficient,":[163],"unless":[164],"speeds":[166],"quite":[168],"close.":[169]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":2}],"updated_date":"2026-02-13T15:27:49.765798","created_date":"2025-10-10T00:00:00"}
