{"id":"https://openalex.org/W2904466066","doi":"https://doi.org/10.1109/ftxs.2018.00005","title":"Fault Tolerant Cholesky Factorization on GPUs","display_name":"Fault Tolerant Cholesky Factorization on GPUs","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2904466066","doi":"https://doi.org/10.1109/ftxs.2018.00005","mag":"2904466066"},"language":"en","primary_location":{"id":"doi:10.1109/ftxs.2018.00005","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ftxs.2018.00005","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/ACM 8th Workshop on Fault Tolerance for HPC at eXtreme Scale (FTXS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033138738","display_name":"Felix Loh","orcid":"https://orcid.org/0000-0001-9288-6766"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Felix Loh","raw_affiliation_strings":["University of Wisconsin-Madison, WI"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, WI","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110218098","display_name":"Kewal K. Saluja","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kewal K. Saluja","raw_affiliation_strings":["University of Wisconsin-Madison, WI"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, WI","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101644702","display_name":"Parameswaran Ramanathan","orcid":"https://orcid.org/0000-0003-3457-139X"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Parameswaran Ramanathan","raw_affiliation_strings":["University of Wisconsin-Madison, WI"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, WI","institution_ids":["https://openalex.org/I135310074"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5033138738"],"corresponding_institution_ids":["https://openalex.org/I135310074"],"apc_list":null,"apc_paid":null,"fwci":0.3691,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.66065676,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"11","last_page":"18"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cholesky-decomposition","display_name":"Cholesky decomposition","score":0.9711254835128784},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8154663443565369},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6246511340141296},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.6102602481842041},{"id":"https://openalex.org/keywords/minimum-degree-algorithm","display_name":"Minimum degree algorithm","score":0.577843189239502},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.5584349036216736},{"id":"https://openalex.org/keywords/checksum","display_name":"Checksum","score":0.5315031409263611},{"id":"https://openalex.org/keywords/incomplete-cholesky-factorization","display_name":"Incomplete Cholesky factorization","score":0.4818050265312195},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.44742894172668457},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4461183547973633},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.4185506999492645},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.17831921577453613}],"concepts":[{"id":"https://openalex.org/C34727166","wikidata":"https://www.wikidata.org/wiki/Q515375","display_name":"Cholesky decomposition","level":3,"score":0.9711254835128784},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8154663443565369},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6246511340141296},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.6102602481842041},{"id":"https://openalex.org/C46085209","wikidata":"https://www.wikidata.org/wiki/Q17098969","display_name":"Minimum degree algorithm","level":5,"score":0.577843189239502},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.5584349036216736},{"id":"https://openalex.org/C162372511","wikidata":"https://www.wikidata.org/wiki/Q218341","display_name":"Checksum","level":2,"score":0.5315031409263611},{"id":"https://openalex.org/C44363057","wikidata":"https://www.wikidata.org/wiki/Q6015160","display_name":"Incomplete Cholesky factorization","level":4,"score":0.4818050265312195},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.44742894172668457},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4461183547973633},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.4185506999492645},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.17831921577453613},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ftxs.2018.00005","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ftxs.2018.00005","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/ACM 8th Workshop on Fault Tolerance for HPC at eXtreme Scale (FTXS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1968912364","https://openalex.org/W1995746640","https://openalex.org/W2035080386","https://openalex.org/W2044829989","https://openalex.org/W2046332585","https://openalex.org/W2052455844","https://openalex.org/W2056109788","https://openalex.org/W2083613288","https://openalex.org/W2089024363","https://openalex.org/W2090495704","https://openalex.org/W2095078213","https://openalex.org/W2126315047","https://openalex.org/W2148039812","https://openalex.org/W2295431707","https://openalex.org/W2397636522","https://openalex.org/W2412349256","https://openalex.org/W2485331474","https://openalex.org/W2798909945","https://openalex.org/W4214549590"],"related_works":["https://openalex.org/W2039814159","https://openalex.org/W898189783","https://openalex.org/W2049943687","https://openalex.org/W2381435995","https://openalex.org/W2104481679","https://openalex.org/W2041033288","https://openalex.org/W2028693659","https://openalex.org/W3046276560","https://openalex.org/W2053627399","https://openalex.org/W2077700531"],"abstract_inverted_index":{"Direct":[0],"Cholesky-based":[1],"solvers":[2,20,34],"are":[3,58,70,102],"typically":[4],"used":[5,61],"to":[6,30,72],"solve":[7],"large":[8],"linear":[9,27],"systems":[10],"where":[11],"the":[12,89,115,129],"coefficient":[13],"matrix":[14],"is":[15],"symmetric":[16],"positive":[17],"definite.":[18],"These":[19],"offer":[21],"faster":[22],"performance":[23,122],"in":[24,66,128],"solving":[25],"such":[26,35,78],"systems,":[28],"compared":[29],"other":[31],"more":[32,96,98],"general":[33],"as":[36,62,79,95],"LU":[37],"and":[38,57,83,97,119,143,154,166],"QR":[39],"solvers.":[40],"In":[41,104],"recent":[42],"days,":[43],"graphics":[44],"processing":[45],"units":[46,65],"(GPUs)":[47],"have":[48,160],"become":[49],"a":[50,87,124,135,148],"popular":[51],"platform":[52],"for":[53,114,140],"scientific":[54],"computing":[55,100],"applications,":[56],"increasingly":[59],"being":[60],"major":[63],"computational":[64],"supercomputers.":[67],"However,":[68],"GPUs":[69,142],"susceptible":[71],"transient":[73,136],"faults":[74],"caused":[75],"by":[76],"events":[77],"alpha":[80],"particle":[81],"strikes":[82],"power":[84],"fluctuations.":[85],"As":[86],"result,":[88],"possibility":[90],"of":[91,131],"an":[92],"error":[93,164],"increases":[94],"GPU":[99],"nodes":[101],"used.":[103],"this":[105],"paper,":[106],"we":[107],"introduce":[108],"two":[109],"efficient":[110],"fault":[111,137,151],"tolerance":[112,152],"schemes":[113,146,159],"Cholesky":[116,126],"factorization":[117],"method,":[118],"study":[120],"their":[121],"using":[123],"direct":[125],"solver":[127],"presence":[130],"faults.":[132],"We":[133],"utilize":[134],"injection":[138],"mechanism":[139],"NVIDIA":[141],"compare":[144],"our":[145,157],"with":[147],"traditional":[149],"checksum":[150],"technique,":[153],"show":[155],"that":[156],"proposed":[158],"superior":[161],"performance,":[162],"good":[163],"coverage":[165],"low":[167],"overhead.":[168]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
