{"id":"https://openalex.org/W2150255127","doi":"https://doi.org/10.1109/ipdps.2008.4536302","title":"Scalable group-based checkpoint/restart for large-scale message-passing systems","display_name":"Scalable group-based checkpoint/restart for large-scale message-passing systems","publication_year":2008,"publication_date":"2008-04-01","ids":{"openalex":"https://openalex.org/W2150255127","doi":"https://doi.org/10.1109/ipdps.2008.4536302","mag":"2150255127"},"language":"en","primary_location":{"id":"doi:10.1109/ipdps.2008.4536302","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2008.4536302","pdf_url":null,"source":{"id":"https://openalex.org/S4210174069","display_name":"Proceedings - IEEE International Parallel and Distributed Processing Symposium","issn_l":"1530-2075","issn":["1530-2075"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE International Symposium on Parallel and Distributed Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100895298","display_name":"Justin C. Y. Ho","orcid":null},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Justin C. Y. Ho","raw_affiliation_strings":["Department of Computer Science, University of Hong Kong, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004156389","display_name":"Cho-Li Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Cho-Li Wang","raw_affiliation_strings":["Department of Computer Science, University of Hong Kong, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102788789","display_name":"Francis C. M. Lau","orcid":"https://orcid.org/0000-0003-1082-9333"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Francis C. M. Lau","raw_affiliation_strings":["Department of Computer Science, University of Hong Kong, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100895298"],"corresponding_institution_ids":["https://openalex.org/I889458895"],"apc_list":null,"apc_paid":null,"fwci":3.4305,"has_fulltext":false,"cited_by_count":28,"citation_normalized_percentile":{"value":0.93195614,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"iwdc 2005","issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8651485443115234},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.852668046951294},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.7941056489944458},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.7001527547836304},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6377156376838684},{"id":"https://openalex.org/keywords/communication-in-small-groups","display_name":"Communication in small groups","score":0.6110793948173523},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.6043587923049927},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.5932549238204956},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.5155784487724304},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.46778595447540283},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.292116641998291},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.28530681133270264},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.06934428215026855}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8651485443115234},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.852668046951294},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.7941056489944458},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.7001527547836304},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6377156376838684},{"id":"https://openalex.org/C44871818","wikidata":"https://www.wikidata.org/wiki/Q5154139","display_name":"Communication in small groups","level":2,"score":0.6110793948173523},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6043587923049927},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.5932549238204956},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.5155784487724304},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.46778595447540283},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.292116641998291},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.28530681133270264},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.06934428215026855},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/ipdps.2008.4536302","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2008.4536302","pdf_url":null,"source":{"id":"https://openalex.org/S4210174069","display_name":"Proceedings - IEEE International Parallel and Distributed Processing Symposium","issn_l":"1530-2075","issn":["1530-2075"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE International Symposium on Parallel and Distributed Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.128.9027","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.128.9027","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.hku.hk/~clwang/papers/ipdps2008-GroupCKP.pdf","raw_type":"text"},{"id":"pmh:oai:hub.hku.hk:10722/93186","is_oa":false,"landing_page_url":"http://hdl.handle.net/10722/93186","pdf_url":null,"source":{"id":"https://openalex.org/S4377196271","display_name":"The HKU Scholars Hub (University of Hong Kong)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I889458895","host_organization_name":"University of Hong Kong","host_organization_lineage":["https://openalex.org/I889458895"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference_Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1607687639","https://openalex.org/W1997021720","https://openalex.org/W2045879521","https://openalex.org/W2052915895","https://openalex.org/W2095487435","https://openalex.org/W2100970777","https://openalex.org/W2113052966","https://openalex.org/W2116006827","https://openalex.org/W2116089112","https://openalex.org/W2131053137","https://openalex.org/W2158108598","https://openalex.org/W2164507994","https://openalex.org/W2170454619"],"related_works":["https://openalex.org/W2392835431","https://openalex.org/W2120447654","https://openalex.org/W2977179488","https://openalex.org/W2144453115","https://openalex.org/W2128223750","https://openalex.org/W4238532390","https://openalex.org/W2126932387","https://openalex.org/W2188872161","https://openalex.org/W2082659044","https://openalex.org/W2811255748"],"abstract_inverted_index":{"The":[0],"ever":[1],"increasing":[2],"number":[3],"of":[4,27,48,91],"processors":[5],"used":[6],"in":[7,15],"parallel":[8,17],"computers":[9],"is":[10,65,94],"making":[11],"fault":[12],"tolerance":[13],"support":[14],"large-scale":[16],"systems":[18],"more":[19,21],"and":[20,45,62,74,78],"important.":[22],"We":[23,40],"discuss":[24],"the":[25,36,42,88,92],"inadequacies":[26],"existing":[28],"system-level":[29],"checkpointing":[30,54,61],"solutions":[31],"for":[32],"message-passing":[33],"applications":[34],"as":[35],"system":[37],"scales":[38],"up.":[39],"analyze":[41,87],"coordination":[43],"cost":[44],"blocking":[46],"behavior":[47],"two":[49],"current":[50],"MPI":[51],"implementations":[52],"with":[53],"support.":[55],"A":[56],"group-based":[57],"solution":[58],"combining":[59],"coordinated":[60],"message":[63],"logging":[64],"then":[66],"proposed.":[67,95],"Experiment":[68],"results":[69],"demonstrate":[70],"its":[71],"better":[72],"performance":[73],"scalability":[75],"than":[76],"LAM/MPI":[77],"MPICH-VCL.":[79],"To":[80],"assist":[81],"group":[82],"formation,":[83],"a":[84],"method":[85],"to":[86],"communication":[89],"behaviors":[90],"application":[93]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":6}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
