{"id":"https://openalex.org/W2755218711","doi":"https://doi.org/10.1109/hpcs.2017.73","title":"A Fault Tolerance Manager with Distributed Coordinated Checkpoints for Automatic Recovery","display_name":"A Fault Tolerance Manager with Distributed Coordinated Checkpoints for Automatic Recovery","publication_year":2017,"publication_date":"2017-07-01","ids":{"openalex":"https://openalex.org/W2755218711","doi":"https://doi.org/10.1109/hpcs.2017.73","mag":"2755218711"},"language":"en","primary_location":{"id":"doi:10.1109/hpcs.2017.73","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcs.2017.73","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 International Conference on High Performance Computing &amp; Simulation (HPCS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072892292","display_name":"Jorge Villamayor","orcid":"https://orcid.org/0000-0002-1729-037X"},"institutions":[{"id":"https://openalex.org/I123044942","display_name":"Universitat Aut\u00f2noma de Barcelona","ror":"https://ror.org/052g8jq94","country_code":"ES","type":"education","lineage":["https://openalex.org/I123044942"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Jorge Villamayor","raw_affiliation_strings":["CAOS - Computer Architecture and Operating Systems, Universidad Aut\u00f3noma de Barcelona, Barcelona, Spain"],"affiliations":[{"raw_affiliation_string":"CAOS - Computer Architecture and Operating Systems, Universidad Aut\u00f3noma de Barcelona, Barcelona, Spain","institution_ids":["https://openalex.org/I123044942"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051751511","display_name":"Dolores Rexachs","orcid":"https://orcid.org/0000-0001-5500-850X"},"institutions":[{"id":"https://openalex.org/I123044942","display_name":"Universitat Aut\u00f2noma de Barcelona","ror":"https://ror.org/052g8jq94","country_code":"ES","type":"education","lineage":["https://openalex.org/I123044942"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Dolores Rexachs","raw_affiliation_strings":["CAOS - Computer Architecture and Operating Systems, Universidad Aut\u00f3noma de Barcelona, Barcelona, Spain"],"affiliations":[{"raw_affiliation_string":"CAOS - Computer Architecture and Operating Systems, Universidad Aut\u00f3noma de Barcelona, Barcelona, Spain","institution_ids":["https://openalex.org/I123044942"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086588481","display_name":"Emilio Luque","orcid":"https://orcid.org/0000-0002-2884-3232"},"institutions":[{"id":"https://openalex.org/I123044942","display_name":"Universitat Aut\u00f2noma de Barcelona","ror":"https://ror.org/052g8jq94","country_code":"ES","type":"education","lineage":["https://openalex.org/I123044942"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Emilio Luque","raw_affiliation_strings":["CAOS - Computer Architecture and Operating Systems, Universidad Aut\u00f3noma de Barcelona, Barcelona, Spain"],"affiliations":[{"raw_affiliation_string":"CAOS - Computer Architecture and Operating Systems, Universidad Aut\u00f3noma de Barcelona, Barcelona, Spain","institution_ids":["https://openalex.org/I123044942"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5072892292"],"corresponding_institution_ids":["https://openalex.org/I123044942"],"apc_list":null,"apc_paid":null,"fwci":0.4144,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.66302311,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"452","last_page":"459"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8508032560348511},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.7578125596046448},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.7171910405158997},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6984298825263977},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6981902122497559},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6625112295150757},{"id":"https://openalex.org/keywords/high-availability","display_name":"High availability","score":0.5474902987480164},{"id":"https://openalex.org/keywords/node","display_name":"Node (physics)","score":0.5295988917350769},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.4500657320022583},{"id":"https://openalex.org/keywords/computer-cluster","display_name":"Computer cluster","score":0.4406645894050598},{"id":"https://openalex.org/keywords/distributed-data-store","display_name":"Distributed data store","score":0.41761842370033264},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.36986058950424194},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.2900198698043823}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8508032560348511},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.7578125596046448},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.7171910405158997},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6984298825263977},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6981902122497559},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6625112295150757},{"id":"https://openalex.org/C65813073","wikidata":"https://www.wikidata.org/wiki/Q1622420","display_name":"High availability","level":2,"score":0.5474902987480164},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.5295988917350769},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.4500657320022583},{"id":"https://openalex.org/C29140674","wikidata":"https://www.wikidata.org/wiki/Q206637","display_name":"Computer cluster","level":2,"score":0.4406645894050598},{"id":"https://openalex.org/C24885549","wikidata":"https://www.wikidata.org/wiki/Q339678","display_name":"Distributed data store","level":2,"score":0.41761842370033264},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.36986058950424194},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2900198698043823},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpcs.2017.73","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcs.2017.73","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 International Conference on High Performance Computing &amp; Simulation (HPCS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47999998927116394,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1953398341","https://openalex.org/W1981432246","https://openalex.org/W1984564341","https://openalex.org/W2008323179","https://openalex.org/W2010958160","https://openalex.org/W2074787299","https://openalex.org/W2095725132","https://openalex.org/W2100970777","https://openalex.org/W2133046454","https://openalex.org/W2135310158","https://openalex.org/W2135372757","https://openalex.org/W2139244298","https://openalex.org/W2157198314","https://openalex.org/W2170454619","https://openalex.org/W6646345524"],"related_works":["https://openalex.org/W2363040373","https://openalex.org/W2103295733","https://openalex.org/W2808531585","https://openalex.org/W4233801908","https://openalex.org/W4385721312","https://openalex.org/W1538240937","https://openalex.org/W2350978943","https://openalex.org/W1532689837","https://openalex.org/W2383764498","https://openalex.org/W2204165862"],"abstract_inverted_index":{"Components":[0],"for":[1,78,105,171],"High":[2],"Performance":[3],"Computing":[4],"are":[5,36,116],"continuously":[6],"increasing":[7],"to":[8,55,84,119,125,135,169,186],"achieve":[9],"more":[10],"performance":[11],"and":[12,28,103,138,195,211],"satisfy":[13],"scientific":[14],"application":[15,111],"users":[16,86,106,167],"demands.":[17],"To":[18,23],"reduce":[19],"the":[20,51,67,98,146,150,201,204],"Mean":[21],"Time":[22],"Repair":[24],"in":[25,46,58,122,162,190,207],"these":[26],"systems":[27],"increment":[29],"high":[30],"availability,":[31],"Fault":[32,74],"Tolerance":[33,75],"(FT)":[34],"solutions":[35],"required.":[37],"The":[38],"checkpoint/restart":[39],"approach":[40,158,206],"is":[41,66,82,159,178],"a":[42,73,153,182,191,208,212],"widely":[43],"used":[44,53],"mechanism":[45],"FT":[47,101],"solutions.":[48],"One":[49],"of":[50,100,109,131,142,152,203],"most":[52],"technique":[54],"take":[56],"checkpoints":[57],"parallel":[59],"applications":[60],"implemented":[61],"using":[62],"Message":[63],"Passing":[64],"Interface":[65],"coordinated":[68,79],"checkpoints.":[69],"In":[70],"this":[71],"paper":[72],"Manager":[76],"(FTM)":[77],"checkpoint":[80],"files":[81],"presented,":[83],"provide":[85,187],"automatic":[87,194],"recovery":[88],"from":[89],"failures":[90],"when":[91],"losing":[92],"computing":[93,215],"nodes.":[94],"This":[95,157,176],"proposal":[96],"makes":[97],"configuration":[99],"simpler":[102],"transparent":[104],"without":[107],"knowledge":[108],"their":[110,123],"implementation.":[112],"Furthermore,":[113],"system":[114],"administrators":[115],"not":[117],"required":[118],"install":[120],"libraries":[121],"cluster":[124,210],"support":[126],"FTM.":[127],"It":[128],"takes":[129],"advantage":[130],"node":[132],"local":[133],"storage":[134,174],"save":[136],"checkpoints,":[137],"it":[139],"distributes":[140],"copies":[141],"them":[143],"along":[144],"all":[145],"computation":[147],"nodes,":[148],"avoiding":[149],"bottleneck":[151],"central":[154],"stable":[155,173],"storage.":[156],"particularly":[160],"useful":[161],"IaaS":[163],"cloud":[164,214],"environments,":[165],"where":[166],"have":[168],"pay":[170],"centralized":[172],"services.":[175],"work":[177],"based":[179],"on":[180],"RADIC,":[181],"well-":[183],"known":[184],"architecture":[185],"fault":[188],"tolerance":[189],"distributed,":[192],"flexible,":[193],"scalable":[196],"way.":[197],"Experimental":[198],"results":[199],"shows":[200],"benefits":[202],"presented":[205],"private":[209],"well-known":[213],"environment,":[216],"Amazon":[217],"EC2.":[218]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
