{"id":"https://openalex.org/W2137787140","doi":"https://doi.org/10.1177/1094342013505348","title":"Using group replication for resilience on exascale systems","display_name":"Using group replication for resilience on exascale systems","publication_year":2013,"publication_date":"2013-10-01","ids":{"openalex":"https://openalex.org/W2137787140","doi":"https://doi.org/10.1177/1094342013505348","mag":"2137787140"},"language":"en","primary_location":{"id":"doi:10.1177/1094342013505348","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342013505348","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://inria.hal.science/hal-00668016","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062080875","display_name":"Marin Bougeret","orcid":"https://orcid.org/0000-0002-9910-4656"},"institutions":[{"id":"https://openalex.org/I4210099593","display_name":"Computer Algorithms for Medicine","ror":"https://ror.org/00zky6d38","country_code":"AT","type":"facility","lineage":["https://openalex.org/I4210099593"]},{"id":"https://openalex.org/I4210101743","display_name":"Laboratoire d'Informatique, de Robotique et de Micro\u00e9lectronique de Montpellier","ror":"https://ror.org/013yean28","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I151295451","https://openalex.org/I19894307","https://openalex.org/I4210101743","https://openalex.org/I4210159245","https://openalex.org/I4412460525"]}],"countries":["AT","FR"],"is_corresponding":false,"raw_author_name":"Marin Bougeret","raw_affiliation_strings":["LIRMM Montpellier, France","Methods, Algorithms for Operations REsearch"],"affiliations":[{"raw_affiliation_string":"LIRMM Montpellier, France","institution_ids":["https://openalex.org/I4210101743"]},{"raw_affiliation_string":"Methods, Algorithms for Operations REsearch","institution_ids":["https://openalex.org/I4210099593"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022674890","display_name":"Henri Casanova","orcid":"https://orcid.org/0000-0001-6310-0365"},"institutions":[{"id":"https://openalex.org/I117965899","display_name":"University of Hawai\u02bbi at M\u0101noa","ror":"https://ror.org/01wspgy28","country_code":"US","type":"education","lineage":["https://openalex.org/I117965899"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Henri Casanova","raw_affiliation_strings":["University of Hawaii at Manoa, Honolulu, USA"],"affiliations":[{"raw_affiliation_string":"University of Hawaii at Manoa, Honolulu, USA","institution_ids":["https://openalex.org/I117965899"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001838181","display_name":"Yves Robert","orcid":"https://orcid.org/0000-0003-2361-055X"},"institutions":[{"id":"https://openalex.org/I113428412","display_name":"\u00c9cole Normale Sup\u00e9rieure de Lyon","ror":"https://ror.org/04zmssz18","country_code":"FR","type":"education","lineage":["https://openalex.org/I113428412","https://openalex.org/I203339264"]},{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["FR","US"],"is_corresponding":false,"raw_author_name":"Yves Robert","raw_affiliation_strings":["Ecole Normale Sup\u00e9rieure de Lyon, France","University of Tennessee, Knoxville, USA"],"affiliations":[{"raw_affiliation_string":"Ecole Normale Sup\u00e9rieure de Lyon, France","institution_ids":["https://openalex.org/I113428412"]},{"raw_affiliation_string":"University of Tennessee, Knoxville, USA","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086100728","display_name":"Fr\u00e9d\u00e9ric Vivien","orcid":"https://orcid.org/0000-0002-0663-6152"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]},{"id":"https://openalex.org/I113428412","display_name":"\u00c9cole Normale Sup\u00e9rieure de Lyon","ror":"https://ror.org/04zmssz18","country_code":"FR","type":"education","lineage":["https://openalex.org/I113428412","https://openalex.org/I203339264"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Fr\u00e9d\u00e9ric Vivien","raw_affiliation_strings":["Ecole Normale Sup\u00e9rieure de Lyon, France","INRIA, France"],"affiliations":[{"raw_affiliation_string":"Ecole Normale Sup\u00e9rieure de Lyon, France","institution_ids":["https://openalex.org/I113428412"]},{"raw_affiliation_string":"INRIA, France","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055988056","display_name":"Dounia Zaidouni","orcid":null},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]},{"id":"https://openalex.org/I113428412","display_name":"\u00c9cole Normale Sup\u00e9rieure de Lyon","ror":"https://ror.org/04zmssz18","country_code":"FR","type":"education","lineage":["https://openalex.org/I113428412","https://openalex.org/I203339264"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Dounia Zaidouni","raw_affiliation_strings":["Ecole Normale Sup\u00e9rieure de Lyon, France","INRIA, France"],"affiliations":[{"raw_affiliation_string":"Ecole Normale Sup\u00e9rieure de Lyon, France","institution_ids":["https://openalex.org/I113428412"]},{"raw_affiliation_string":"INRIA, France","institution_ids":["https://openalex.org/I1326498283"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5055988056"],"corresponding_institution_ids":["https://openalex.org/I113428412","https://openalex.org/I1326498283"],"apc_list":null,"apc_paid":null,"fwci":3.0225,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.92297086,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"28","issue":"2","first_page":"210","last_page":"224"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/replication","display_name":"Replication (statistics)","score":0.8202025294303894},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.808164656162262},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.7396707534790039},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6989932060241699},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.630404531955719},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5975673198699951},{"id":"https://openalex.org/keywords/resilience","display_name":"Resilience (materials science)","score":0.5536304116249084},{"id":"https://openalex.org/keywords/exponential-function","display_name":"Exponential function","score":0.43217504024505615},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.42461469769477844},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.13958412408828735},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09388870000839233}],"concepts":[{"id":"https://openalex.org/C12590798","wikidata":"https://www.wikidata.org/wiki/Q3933199","display_name":"Replication (statistics)","level":2,"score":0.8202025294303894},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.808164656162262},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.7396707534790039},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6989932060241699},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.630404531955719},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5975673198699951},{"id":"https://openalex.org/C2779585090","wikidata":"https://www.wikidata.org/wiki/Q3457762","display_name":"Resilience (materials science)","level":2,"score":0.5536304116249084},{"id":"https://openalex.org/C151376022","wikidata":"https://www.wikidata.org/wiki/Q168698","display_name":"Exponential function","level":2,"score":0.43217504024505615},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.42461469769477844},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.13958412408828735},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09388870000839233},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1177/1094342013505348","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342013505348","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.232.6428","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.232.6428","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.netlib.org/lapack/lawnspdf/lawn265.pdf","raw_type":"text"},{"id":"pmh:oai:HAL:hal-00668016v2","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-00668016","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"[Research Report] RR-7876, INRIA. 2012","raw_type":"Reports"},{"id":"pmh:oai:HAL:hal-00881463v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-00881463","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://hpc.sagepub.com/content/early/2013/09/30/1094342013505348","raw_type":"Journal articles"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-00668016v2","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-00668016","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"[Research Report] RR-7876, INRIA. 2012","raw_type":"Reports"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W79276452","https://openalex.org/W94439627","https://openalex.org/W1558516248","https://openalex.org/W1870609547","https://openalex.org/W1969359541","https://openalex.org/W1993660990","https://openalex.org/W2014767141","https://openalex.org/W2016411419","https://openalex.org/W2031260715","https://openalex.org/W2033656974","https://openalex.org/W2062563466","https://openalex.org/W2063924830","https://openalex.org/W2064388050","https://openalex.org/W2081235423","https://openalex.org/W2089536264","https://openalex.org/W2098631346","https://openalex.org/W2102576651","https://openalex.org/W2109192777","https://openalex.org/W2117667603","https://openalex.org/W2119018856","https://openalex.org/W2122967269","https://openalex.org/W2127433432","https://openalex.org/W2131629153","https://openalex.org/W2133046454","https://openalex.org/W2150871235","https://openalex.org/W2161537327","https://openalex.org/W2164590718","https://openalex.org/W2165485434","https://openalex.org/W2169246841","https://openalex.org/W2169624732","https://openalex.org/W2914465576","https://openalex.org/W2990714382","https://openalex.org/W3148885254","https://openalex.org/W4231150350","https://openalex.org/W4240793564","https://openalex.org/W4242361160","https://openalex.org/W4246678129","https://openalex.org/W4256364678","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W4205713785","https://openalex.org/W3016766501","https://openalex.org/W2901033488","https://openalex.org/W2031325922","https://openalex.org/W1906576859","https://openalex.org/W1862835629","https://openalex.org/W2099111379","https://openalex.org/W2136799148","https://openalex.org/W2897533804","https://openalex.org/W2890506991"],"abstract_inverted_index":{"High":[0],"performance":[1],"computing":[2],"applications":[3],"must":[4,54],"be":[5,56],"resilient":[6],"to":[7,21,91,97,106,137,154,184],"faults.":[8],"The":[9],"traditional":[10],"fault-tolerance":[11,52],"solution":[12,166],"is":[13,19,61,169,198,202],"checkpoint-recovery,":[14],"by":[15],"which":[16],"application":[17,81,111,131,209],"state":[18],"saved":[20],"and":[22,108,191,210],"recovered":[23],"from":[24],"secondary":[25],"storage":[26],"throughout":[27],"execution.":[28],"It":[29],"has":[30],"been":[31],"shown":[32],"that,":[33],"even":[34],"when":[35,95],"using":[36,98],"an":[37,80,114,125,162],"optimal":[38],"checkpointing":[39,42,140,165,211],"strategy,":[40],"the":[41,68,129,185],"overhead":[43,212],"precludes":[44],"high":[45],"parallel":[46,93],"efficiency":[47,94],"at":[48,101],"large":[49,102],"scale.":[50,103],"Additional":[51],"mechanisms":[53],"thus":[55],"used.":[57],"Such":[58],"a":[59,73,138,150,172,205],"mechanism":[60],"replication,":[62],"that":[63,72,142,199],"is,":[64],"multiple":[65,110],"processors":[66],"performing":[67],"same":[69],"computation":[70],"so":[71],"processor":[74],"failure":[75,193],"does":[76],"not":[77],"necessarily":[78],"imply":[79],"failure.":[82],"In":[83],"spite":[84],"of":[85,207],"resource":[86],"waste,":[87],"replication":[88,201],"can":[89],"lead":[90],"higher":[92],"compared":[96],"only":[99],"checkpoint-recovery":[100],"We":[104],"propose":[105,149],"execute":[107],"checkpoint":[109,157],"instances":[112],"concurrently,":[113],"approach":[115],"we":[116,123,143,148,177],"term":[117],"group":[118,200],"replication.":[119],"For":[120,145],"exponential":[121,190],"failures":[122],"give":[124],"upper":[126],"bound":[127,135],"on":[128],"expected":[130],"execution":[132],"time.":[133],"This":[134],"corresponds":[136],"particular":[139],"period":[141,168],"derive.":[144],"general":[146],"failures,":[147],"dynamic":[151],"programming":[152],"algorithm":[153],"determine":[155],"non-periodic":[156],"dates":[158],"as":[159,161],"well":[160],"empirical":[163],"periodic":[164],"whose":[167],"found":[170],"via":[171],"numerical":[173],"search.":[174],"Using":[175],"simulation":[176],"evaluate":[178],"our":[179],"proposed":[180],"approaches,":[181],"including":[182],"comparison":[183],"non-replication":[186],"case,":[187],"for":[188,214],"both":[189],"Weibull":[192],"distributions.":[194],"Our":[195],"broad":[196],"finding":[197],"useful":[203],"in":[204],"range":[206],"realistic":[208],"scenarios":[213],"future":[215],"exascale":[216],"platforms.":[217]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":2},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
