{"id":"https://openalex.org/W2250671437","doi":"https://doi.org/10.1145/2831129.2831130","title":"Practical resilient cases for FA-MPI, a transactional fault-tolerant MPI","display_name":"Practical resilient cases for FA-MPI, a transactional fault-tolerant MPI","publication_year":2015,"publication_date":"2015-11-09","ids":{"openalex":"https://openalex.org/W2250671437","doi":"https://doi.org/10.1145/2831129.2831130","mag":"2250671437"},"language":"en","primary_location":{"id":"doi:10.1145/2831129.2831130","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2831129.2831130","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd Workshop on Exascale MPI","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010796044","display_name":"Amin Hassani","orcid":null},"institutions":[{"id":"https://openalex.org/I32389192","display_name":"University of Alabama at Birmingham","ror":"https://ror.org/008s83205","country_code":"US","type":"education","lineage":["https://openalex.org/I32389192"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Amin Hassani","raw_affiliation_strings":["University of Alabama at Birmingham, Birmingham, AL"],"affiliations":[{"raw_affiliation_string":"University of Alabama at Birmingham, Birmingham, AL","institution_ids":["https://openalex.org/I32389192"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026440046","display_name":"Anthony Skjellum","orcid":"https://orcid.org/0000-0001-5252-6600"},"institutions":[{"id":"https://openalex.org/I82497590","display_name":"Auburn University","ror":"https://ror.org/02v80fc35","country_code":"US","type":"education","lineage":["https://openalex.org/I82497590"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anthony Skjellum","raw_affiliation_strings":["Auburn University, Auburn, AL"],"affiliations":[{"raw_affiliation_string":"Auburn University, Auburn, AL","institution_ids":["https://openalex.org/I82497590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084732782","display_name":"Purushotham Bangalore","orcid":"https://orcid.org/0000-0002-1098-9997"},"institutions":[{"id":"https://openalex.org/I32389192","display_name":"University of Alabama at Birmingham","ror":"https://ror.org/008s83205","country_code":"US","type":"education","lineage":["https://openalex.org/I32389192"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Purushotham V. Bangalore","raw_affiliation_strings":["University of Alabama at Birmingham, Birmingham, AL"],"affiliations":[{"raw_affiliation_string":"University of Alabama at Birmingham, Birmingham, AL","institution_ids":["https://openalex.org/I32389192"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036922303","display_name":"Ron Brightwell","orcid":"https://orcid.org/0009-0009-8186-222X"},"institutions":[{"id":"https://openalex.org/I4210104735","display_name":"Sandia National Laboratories","ror":"https://ror.org/01apwpt12","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210104735"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ron Brightwell","raw_affiliation_strings":["Sandia National Laboratories, Albuquerque, NM"],"affiliations":[{"raw_affiliation_string":"Sandia National Laboratories, Albuquerque, NM","institution_ids":["https://openalex.org/I4210104735"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5010796044"],"corresponding_institution_ids":["https://openalex.org/I32389192"],"apc_list":null,"apc_paid":null,"fwci":1.3313,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.84019907,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13471","display_name":"Cognitive Functions and Memory","score":0.951200008392334,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9312000274658203,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8279940485954285},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.7443854808807373},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6795127391815186},{"id":"https://openalex.org/keywords/transactional-memory","display_name":"Transactional memory","score":0.5439867377281189},{"id":"https://openalex.org/keywords/transactional-leadership","display_name":"Transactional leadership","score":0.43639075756073},{"id":"https://openalex.org/keywords/message-passing-interface","display_name":"Message Passing Interface","score":0.414108008146286},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3730407953262329},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.275688111782074},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.21067431569099426},{"id":"https://openalex.org/keywords/database-transaction","display_name":"Database transaction","score":0.12437677383422852},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.044305503368377686}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8279940485954285},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.7443854808807373},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6795127391815186},{"id":"https://openalex.org/C134277064","wikidata":"https://www.wikidata.org/wiki/Q878206","display_name":"Transactional memory","level":3,"score":0.5439867377281189},{"id":"https://openalex.org/C68489960","wikidata":"https://www.wikidata.org/wiki/Q2370659","display_name":"Transactional leadership","level":2,"score":0.43639075756073},{"id":"https://openalex.org/C166782233","wikidata":"https://www.wikidata.org/wiki/Q127879","display_name":"Message Passing Interface","level":3,"score":0.414108008146286},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3730407953262329},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.275688111782074},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.21067431569099426},{"id":"https://openalex.org/C75949130","wikidata":"https://www.wikidata.org/wiki/Q848010","display_name":"Database transaction","level":2,"score":0.12437677383422852},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.044305503368377686},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2831129.2831130","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2831129.2831130","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd Workshop on Exascale MPI","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320309904","display_name":"Auburn University","ror":"https://ror.org/02v80fc35"},{"id":"https://openalex.org/F4320332369","display_name":"National Nuclear Security Administration","ror":"https://ror.org/03sk1we31"},{"id":"https://openalex.org/F4320338291","display_name":"Sandia National Laboratories","ror":"https://ror.org/01apwpt12"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W163470805","https://openalex.org/W1825216778","https://openalex.org/W1981432246","https://openalex.org/W1984564341","https://openalex.org/W1991421378","https://openalex.org/W1993505169","https://openalex.org/W1993660990","https://openalex.org/W2000870360","https://openalex.org/W2021234574","https://openalex.org/W2037208432","https://openalex.org/W2040661203","https://openalex.org/W2043701535","https://openalex.org/W2045271686","https://openalex.org/W2045879521","https://openalex.org/W2050989056","https://openalex.org/W2054129151","https://openalex.org/W2063924830","https://openalex.org/W2089536264","https://openalex.org/W2095954861","https://openalex.org/W2128577831","https://openalex.org/W2521708680"],"related_works":["https://openalex.org/W2069952143","https://openalex.org/W2177773059","https://openalex.org/W2533634483","https://openalex.org/W2520648950","https://openalex.org/W2159817808","https://openalex.org/W91323585","https://openalex.org/W2050539045","https://openalex.org/W4224084996","https://openalex.org/W2082659044","https://openalex.org/W3197250739"],"abstract_inverted_index":{"MPI":[0,13,35,128],"is":[1,66],"insufficient":[2],"when":[3],"confronting":[4],"failures.":[5],"FA-MPI":[6,26,115],"(Fault-Aware":[7],"MPI)":[8],"provides":[9],"extensions":[10,86],"to":[11,16,20,33,88,98,100,117],"the":[12,34],"standard":[14],"designed":[15],"enable":[17],"data-parallel":[18],"applications":[19,97],"achieve":[21,50],"resilience":[22],"without":[23],"sacrificing":[24],"scalability.":[25],"introduces":[27],"transactions":[28],"as":[29],"a":[30,78],"novel":[31],"extension":[32],"message-passing":[36],"model.":[37],"Transactions":[38],"support":[39],"failure":[40],"detection,":[41],"isolation,":[42],"mitigation,":[43],"and":[44,59,91,112,123],"recovery":[45],"via":[46],"application-driven":[47],"policies.":[48],"To":[49],"maximum":[51],"achievable":[52],"performance":[53,122],"of":[54,67,80],"modern":[55],"machines,":[56],"overlapping":[57],"communication":[58,75],"I/O":[60],"with":[61,102],"computation":[62],"through":[63],"non-blocking":[64,74],"operations":[65,76],"growing":[68],"importance.":[69],"Therefore,":[70],"we":[71,120],"emphasize":[72],"fault-tolerant,":[73],"plus":[77],"set":[79],"nestable":[81],"lightweight":[82],"transactional":[83],"TryBlock":[84],"API":[85],"able":[87],"exploit":[89],"system":[90],"application":[92],"hierarchy.":[93],"This":[94],"strategy":[95],"enables":[96],"run":[99],"completion":[101],"higher":[103],"probability":[104],"than":[105],"nominally.":[106],"We":[107],"modified":[108],"two":[109],"proxy":[110],"applications---MiniFE":[111],"LULESH---by":[113],"adding":[114],"semantics":[116],"them.":[118],"Finally":[119],"present":[121],"overhead":[124],"results":[125],"for":[126],"1K":[127],"processes.":[129]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
