{"id":"https://openalex.org/W2557645972","doi":"https://doi.org/10.1177/1094342016677085","title":"Building and utilizing fault tolerance support tools for the GASPI applications","display_name":"Building and utilizing fault tolerance support tools for the GASPI applications","publication_year":2016,"publication_date":"2016-11-28","ids":{"openalex":"https://openalex.org/W2557645972","doi":"https://doi.org/10.1177/1094342016677085","mag":"2557645972"},"language":"en","primary_location":{"id":"doi:10.1177/1094342016677085","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342016677085","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://open.fau.de/bitstreams/2129ffc4-be1a-433b-8115-6223f9e1bdc1/download","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102889958","display_name":"Faisal Shahzad","orcid":"https://orcid.org/0000-0002-6766-7622"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Faisal Shahzad","raw_affiliation_strings":["Erlangen Regional Computing Center, University of Erlangen\u2013Nuremberg Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Erlangen Regional Computing Center, University of Erlangen\u2013Nuremberg Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038048025","display_name":"Moritz Kreutzer","orcid":"https://orcid.org/0000-0002-7822-9468"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Moritz Kreutzer","raw_affiliation_strings":["Erlangen Regional Computing Center, University of Erlangen\u2013Nuremberg Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Erlangen Regional Computing Center, University of Erlangen\u2013Nuremberg Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112049374","display_name":"Thomas Zeiser","orcid":"https://orcid.org/0009-0002-2916-911X"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Thomas Zeiser","raw_affiliation_strings":["Erlangen Regional Computing Center, University of Erlangen\u2013Nuremberg Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Erlangen Regional Computing Center, University of Erlangen\u2013Nuremberg Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088123154","display_name":"Rui Machado","orcid":"https://orcid.org/0009-0009-2759-2302"},"institutions":[{"id":"https://openalex.org/I3019415892","display_name":"Fraunhofer Institute for Industrial Mathematics","ror":"https://ror.org/019hjw009","country_code":"DE","type":"facility","lineage":["https://openalex.org/I3019415892","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Rui Machado","raw_affiliation_strings":["Fraunhofer Institute for Industrial Mathematics (ITWM), Fraunhofer Platz 1, Kaiserslautern, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer Institute for Industrial Mathematics (ITWM), Fraunhofer Platz 1, Kaiserslautern, Germany","institution_ids":["https://openalex.org/I3019415892"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066751418","display_name":"Andreas Pieper","orcid":"https://orcid.org/0000-0002-9054-7274"},"institutions":[{"id":"https://openalex.org/I36522303","display_name":"Universit\u00e4t Greifswald","ror":"https://ror.org/00r1edq15","country_code":"DE","type":"education","lineage":["https://openalex.org/I36522303"]},{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Andreas Pieper","raw_affiliation_strings":["Erlangen Regional Computing Center, University of Erlangen\u2013Nuremberg Erlangen, Germany","Institute of Physics, University of Greifswald, Greifswald, Germany"],"affiliations":[{"raw_affiliation_string":"Erlangen Regional Computing Center, University of Erlangen\u2013Nuremberg Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]},{"raw_affiliation_string":"Institute of Physics, University of Greifswald, Greifswald, Germany","institution_ids":["https://openalex.org/I36522303"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082552227","display_name":"Georg Hager","orcid":"https://orcid.org/0000-0002-8723-2781"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Georg Hager","raw_affiliation_strings":["Erlangen Regional Computing Center, University of Erlangen\u2013Nuremberg Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Erlangen Regional Computing Center, University of Erlangen\u2013Nuremberg Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070209050","display_name":"Gerhard Wellein","orcid":"https://orcid.org/0000-0001-7371-3026"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gerhard Wellein","raw_affiliation_strings":["Erlangen Regional Computing Center, University of Erlangen\u2013Nuremberg Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Erlangen Regional Computing Center, University of Erlangen\u2013Nuremberg Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102889958"],"corresponding_institution_ids":["https://openalex.org/I181369854"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.15454166,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"32","issue":"5","first_page":"613","last_page":"626"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.8156296014785767},{"id":"https://openalex.org/keywords/partitioned-global-address-space","display_name":"Partitioned global address space","score":0.7246394753456116},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7228394746780396},{"id":"https://openalex.org/keywords/software-fault-tolerance","display_name":"Software fault tolerance","score":0.55617356300354},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.554768443107605},{"id":"https://openalex.org/keywords/fault","display_name":"Fault (geology)","score":0.5007684230804443},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.500014066696167},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.47018754482269287},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3642177879810333},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.24523749947547913},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.15074920654296875}],"concepts":[{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.8156296014785767},{"id":"https://openalex.org/C60832428","wikidata":"https://www.wikidata.org/wiki/Q945818","display_name":"Partitioned global address space","level":3,"score":0.7246394753456116},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7228394746780396},{"id":"https://openalex.org/C50712370","wikidata":"https://www.wikidata.org/wiki/Q4269346","display_name":"Software fault tolerance","level":3,"score":0.55617356300354},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.554768443107605},{"id":"https://openalex.org/C175551986","wikidata":"https://www.wikidata.org/wiki/Q47089","display_name":"Fault (geology)","level":2,"score":0.5007684230804443},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.500014066696167},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.47018754482269287},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3642177879810333},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.24523749947547913},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.15074920654296875},{"id":"https://openalex.org/C165205528","wikidata":"https://www.wikidata.org/wiki/Q83371","display_name":"Seismology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1177/1094342016677085","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342016677085","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},{"id":"pmh:oai:ub.uni-erlangen.de-opus:13540","is_oa":true,"landing_page_url":"https://nbn-resolving.org/urn:nbn:de:bvb:29-opus4-135406","pdf_url":"https://open.fau.de/bitstreams/2129ffc4-be1a-433b-8115-6223f9e1bdc1/download","source":{"id":"https://openalex.org/S4306401636","display_name":"OPUS Repository (Kooperativer Bibliotheksverbund Berlin-Brandenburg)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},{"id":"pmh:oai:fraunhofer.de:N-429005","is_oa":false,"landing_page_url":"http://publica.fraunhofer.de/documents/N-429005.html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400801","display_name":"Publikationsdatenbank der Fraunhofer-Gesellschaft (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Fraunhofer ITWM","raw_type":"Journal Article"},{"id":"pmh:oai:publica.fraunhofer.de:publica/246661","is_oa":false,"landing_page_url":"https://publica.fraunhofer.de/handle/publica/246661","pdf_url":null,"source":{"id":"https://openalex.org/S4306400318","display_name":"Fraunhofer-Publica (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"journal article"}],"best_oa_location":{"id":"pmh:oai:ub.uni-erlangen.de-opus:13540","is_oa":true,"landing_page_url":"https://nbn-resolving.org/urn:nbn:de:bvb:29-opus4-135406","pdf_url":"https://open.fau.de/bitstreams/2129ffc4-be1a-433b-8115-6223f9e1bdc1/download","source":{"id":"https://openalex.org/S4306401636","display_name":"OPUS Repository (Kooperativer Bibliotheksverbund Berlin-Brandenburg)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"},{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2557645972.pdf","grobid_xml":"https://content.openalex.org/works/W2557645972.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W63848143","https://openalex.org/W141129880","https://openalex.org/W1484190925","https://openalex.org/W1884004174","https://openalex.org/W1956304975","https://openalex.org/W1971876223","https://openalex.org/W1981432246","https://openalex.org/W1986905947","https://openalex.org/W1992773065","https://openalex.org/W1998221613","https://openalex.org/W2000870360","https://openalex.org/W2037783202","https://openalex.org/W2044681268","https://openalex.org/W2056583598","https://openalex.org/W2066060292","https://openalex.org/W2067474224","https://openalex.org/W2084293824","https://openalex.org/W2089536264","https://openalex.org/W2117686912","https://openalex.org/W2128577831","https://openalex.org/W2133943294","https://openalex.org/W4239022315","https://openalex.org/W4245507143","https://openalex.org/W4249883881","https://openalex.org/W4388316717"],"related_works":["https://openalex.org/W2971479921","https://openalex.org/W3145923041","https://openalex.org/W2946906624","https://openalex.org/W841176518","https://openalex.org/W2101077206","https://openalex.org/W2157727563","https://openalex.org/W2470343202","https://openalex.org/W1978919910","https://openalex.org/W1488443159","https://openalex.org/W2005196107"],"abstract_inverted_index":{"Today\u2019s":[0],"high":[1],"performance":[2],"computing":[3],"systems":[4,26],"are":[5],"made":[6],"possible":[7],"by":[8,218],"multiple":[9],"increases":[10],"in":[11,16,49,129,176,221,230,254,270,274],"hardware":[12],"parallelity.":[13],"This":[14,117],"results":[15],"the":[17,25,50,89,94,104,112,123,130,206,209,213,227,231,238,251,278],"decrease":[18],"of":[19,24,44,52,97,106,127,132,171,215,226,233,276,285],"mean":[20],"time":[21],"to":[22,66,115,193],"failures":[23],"with":[27,68],"each":[28],"newer":[29],"generation,":[30],"which":[31,80],"is":[32,38,46,81,119,241,284],"an":[33],"alarming":[34],"trend.":[35],"Therefore,":[36],"it":[37],"not":[39,110],"surprising":[40],"that":[41,137],"a":[42,61,82,98,107,133,163,196,244],"lot":[43],"research":[45],"going":[47],"on":[48,88,121,208,243],"area":[51],"fault":[53,56,99,124,164,174,186,199,264],"tolerance":[54,125,175,187],"and":[55,281,289],"mitigation.":[57],"Applications":[58],"should":[59],"survive":[60],"failure":[62,105,145,261,279],"and/or":[63],"be":[64],"able":[65],"recover":[67,205],"minimal":[69],"cost.":[70],"We":[71,211,248],"have":[72,160],"used":[73],"Global":[74],"Address":[75],"Space":[76],"Programming":[77],"Interface":[78],"(GASPI),":[79],"relatively":[83],"new":[84],"communication":[85,101,152],"library":[86,136],"based":[87,242],"PGAS":[90],"model.":[91],"It":[92],"fulfills":[93],"basic":[95],"requirement":[96],"tolerant":[100,165],"library,":[102],"i.e.":[103],"process":[108],"does":[109],"cause":[111],"remaining":[113],"processes":[114],"fail.":[116],"work":[118],"focused":[120],"extending":[122],"features":[126,143],"GASPI":[128],"form":[131],"supporting":[134],"health-check":[135],"applications":[138,192,228],"can":[139,190],"benefit":[140],"from.":[141],"These":[142],"include":[144],"detection,":[146],"its":[147,157,177],"information":[148],"propagation,":[149],"recovery":[150,282],"management,":[151],"recovery,":[153],"etc.":[154],"To":[155],"reinforce":[156],"utility,":[158],"we":[159,180],"also":[161,249],"developed":[162],"neighbor":[166],"node-level":[167],"checkpoint/restart":[168],"library.":[169],"Instead":[170],"introducing":[172],"algorithm-based":[173],"true":[178],"sense,":[179],"demonstrate":[181],"how":[182],"(using":[183],"these":[184,216],"supplementary":[185],"functions)":[188],"one":[189],"build":[191],"allow":[194],"integrate":[195],"low":[197],"cost":[198,283],"detection/recovery":[200],"mechanism":[201,266],"and,":[202],"if":[203],"necessary,":[204],"application":[207,240],"fly.":[210],"showcase":[212],"usage":[214],"tools":[217],"implementing":[219],"them":[220],"three":[222],"different":[223],"applications.":[224],"Two":[225],"fall":[229],"category":[232],"linear":[234],"sparse":[235],"solvers,":[236],"whereas":[237,273],"third":[239],"fluid":[245],"flow":[246],"solver.":[247],"analyze":[250],"overheads":[252],"involved":[253],"failure-free":[255,271],"cases":[256],"as":[257,259],"well":[258],"various":[260],"cases.":[262],"Our":[263],"detection":[265,280],"causes":[267],"no":[268],"overhead":[269],"cases,":[272],"case":[275],"failure(s),":[277],"reasonably":[286],"acceptable":[287],"order":[288],"shows":[290],"good":[291],"scalability.":[292]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
