{"id":"https://openalex.org/W1978807861","doi":"https://doi.org/10.1145/2184512.2184574","title":"Application monitoring and checkpointing in HPC","display_name":"Application monitoring and checkpointing in HPC","publication_year":2012,"publication_date":"2012-03-29","ids":{"openalex":"https://openalex.org/W1978807861","doi":"https://doi.org/10.1145/2184512.2184574","mag":"1978807861"},"language":"en","primary_location":{"id":"doi:10.1145/2184512.2184574","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2184512.2184574","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 50th Annual Southeast Regional Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062187807","display_name":"William M. Jones","orcid":"https://orcid.org/0000-0002-3796-6330"},"institutions":[{"id":"https://openalex.org/I208081647","display_name":"Coastal Carolina University","ror":"https://ror.org/01621q256","country_code":"US","type":"education","lineage":["https://openalex.org/I208081647"]},{"id":"https://openalex.org/I4210152127","display_name":"Conway School of Landscape Design","ror":"https://ror.org/04q7y8a54","country_code":"US","type":"education","lineage":["https://openalex.org/I4210152127"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"William M. Jones","raw_affiliation_strings":["Coastal Carolina University, Conway, SC"],"affiliations":[{"raw_affiliation_string":"Coastal Carolina University, Conway, SC","institution_ids":["https://openalex.org/I208081647","https://openalex.org/I4210152127"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025457268","display_name":"John T. Daly","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"John T. Daly","raw_affiliation_strings":["Center for Exceptional Computing, ACS, Fort Meade, MD"],"affiliations":[{"raw_affiliation_string":"Center for Exceptional Computing, ACS, Fort Meade, MD","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056569157","display_name":"Nathan DeBardeleben","orcid":"https://orcid.org/0000-0002-5593-9205"},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nathan DeBardeleben","raw_affiliation_strings":["High Performance Computing, Los Alamos National Laboratory, Los Alamos, MN"],"affiliations":[{"raw_affiliation_string":"High Performance Computing, Los Alamos National Laboratory, Los Alamos, MN","institution_ids":["https://openalex.org/I1343871089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5062187807"],"corresponding_institution_ids":["https://openalex.org/I208081647","https://openalex.org/I4210152127"],"apc_list":null,"apc_paid":null,"fwci":3.546,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.92890761,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"262","last_page":"267"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7856680154800415},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.7454886436462402},{"id":"https://openalex.org/keywords/resilience","display_name":"Resilience (materials science)","score":0.7108925580978394},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.6790905594825745},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.6348356008529663},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.6051103472709656},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.539165198802948},{"id":"https://openalex.org/keywords/reliability-engineering","display_name":"Reliability engineering","score":0.46032774448394775},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.45642396807670593},{"id":"https://openalex.org/keywords/computer-cluster","display_name":"Computer cluster","score":0.4328416585922241},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.4277970492839813},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.4157858192920685},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2833014726638794},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1025255024433136},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.08433744311332703}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7856680154800415},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.7454886436462402},{"id":"https://openalex.org/C2779585090","wikidata":"https://www.wikidata.org/wiki/Q3457762","display_name":"Resilience (materials science)","level":2,"score":0.7108925580978394},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.6790905594825745},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.6348356008529663},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6051103472709656},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.539165198802948},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.46032774448394775},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.45642396807670593},{"id":"https://openalex.org/C29140674","wikidata":"https://www.wikidata.org/wiki/Q206637","display_name":"Computer cluster","level":2,"score":0.4328416585922241},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.4277970492839813},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.4157858192920685},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2833014726638794},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1025255024433136},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.08433744311332703},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2184512.2184574","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2184512.2184574","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 50th Annual Southeast Regional Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5099999904632568,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1498586823","https://openalex.org/W1558516248","https://openalex.org/W1970958679","https://openalex.org/W1984564341","https://openalex.org/W1986400910","https://openalex.org/W1987945440","https://openalex.org/W2003379648","https://openalex.org/W2033656974","https://openalex.org/W2050122359","https://openalex.org/W2089536264","https://openalex.org/W2091327926","https://openalex.org/W2093736609","https://openalex.org/W2105524676","https://openalex.org/W2115497013","https://openalex.org/W2122364937","https://openalex.org/W2127433432","https://openalex.org/W2133046454","https://openalex.org/W2138128578","https://openalex.org/W2914870215"],"related_works":["https://openalex.org/W2384867379","https://openalex.org/W1906576859","https://openalex.org/W2525033434","https://openalex.org/W4280533024","https://openalex.org/W4300992253","https://openalex.org/W1942762218","https://openalex.org/W1985270856","https://openalex.org/W3101274117","https://openalex.org/W4298207756","https://openalex.org/W1966875563"],"abstract_inverted_index":{"As":[0],"computational":[1],"cluster":[2],"computers":[3],"rapidly":[4],"grow":[5],"in":[6,14,25,45],"both":[7],"size":[8],"and":[9,28],"complexity,":[10],"system":[11],"reliability":[12],"and,":[13],"particular,":[15],"application":[16,42],"resilience":[17],"have":[18],"become":[19],"increasingly":[20],"important":[21],"factors":[22],"to":[23],"consider":[24],"maintaining":[26],"efficiency":[27],"providing":[29,41],"improved":[30],"compute":[31],"performance":[32],"over":[33],"predecessor":[34],"systems.":[35],"One":[36],"commonly":[37],"used":[38],"mechanism":[39],"for":[40],"fault":[43],"tolerance":[44],"parallel":[46],"systems":[47],"is":[48],"the":[49],"use":[50],"of":[51],"checkpointing.":[52]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
