{"id":"https://openalex.org/W2123500455","doi":"https://doi.org/10.1145/2145816.2145845","title":"Algorithm-based fault tolerance for dense matrix factorizations","display_name":"Algorithm-based fault tolerance for dense matrix factorizations","publication_year":2012,"publication_date":"2012-02-25","ids":{"openalex":"https://openalex.org/W2123500455","doi":"https://doi.org/10.1145/2145816.2145845","mag":"2123500455"},"language":"en","primary_location":{"id":"doi:10.1145/2145816.2145845","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2145816.2145845","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM SIGPLAN symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100620408","display_name":"Peng Du","orcid":"https://orcid.org/0000-0002-8257-8230"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]},{"id":"https://openalex.org/I2802706902","display_name":"Knoxville College","ror":"https://ror.org/02bxrp522","country_code":"US","type":"education","lineage":["https://openalex.org/I2802706902"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Peng Du","raw_affiliation_strings":["University of Tennessee, Knoxville, Knoxville, TN, USA","University of Tennessee-Knoxville, Knoxville, TN, USA"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, Knoxville, TN, USA","institution_ids":["https://openalex.org/I2802706902","https://openalex.org/I75027704"]},{"raw_affiliation_string":"University of Tennessee-Knoxville, Knoxville, TN, USA","institution_ids":["https://openalex.org/I2802706902","https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054873210","display_name":"Aur\u00e9lien Bouteiller","orcid":"https://orcid.org/0000-0001-5108-509X"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]},{"id":"https://openalex.org/I2802706902","display_name":"Knoxville College","ror":"https://ror.org/02bxrp522","country_code":"US","type":"education","lineage":["https://openalex.org/I2802706902"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aurelien Bouteiller","raw_affiliation_strings":["University of Tennessee, Knoxville, Knoxville, TN, USA","University of Tennessee-Knoxville, Knoxville, TN, USA"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, Knoxville, TN, USA","institution_ids":["https://openalex.org/I2802706902","https://openalex.org/I75027704"]},{"raw_affiliation_string":"University of Tennessee-Knoxville, Knoxville, TN, USA","institution_ids":["https://openalex.org/I2802706902","https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010055736","display_name":"George Bosilca","orcid":"https://orcid.org/0000-0003-2411-8495"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]},{"id":"https://openalex.org/I2802706902","display_name":"Knoxville College","ror":"https://ror.org/02bxrp522","country_code":"US","type":"education","lineage":["https://openalex.org/I2802706902"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"George Bosilca","raw_affiliation_strings":["University of Tennessee, Knoxville, Knoxville, TN, USA","University of Tennessee-Knoxville, Knoxville, TN, USA"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, Knoxville, TN, USA","institution_ids":["https://openalex.org/I2802706902","https://openalex.org/I75027704"]},{"raw_affiliation_string":"University of Tennessee-Knoxville, Knoxville, TN, USA","institution_ids":["https://openalex.org/I2802706902","https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008117654","display_name":"Thomas H\u00e9rault","orcid":"https://orcid.org/0000-0001-6756-6189"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]},{"id":"https://openalex.org/I2802706902","display_name":"Knoxville College","ror":"https://ror.org/02bxrp522","country_code":"US","type":"education","lineage":["https://openalex.org/I2802706902"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thomas Herault","raw_affiliation_strings":["University of Tennessee, Knoxville, Knoxville, TN, USA","University of Tennessee-Knoxville, Knoxville, TN, USA"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, Knoxville, TN, USA","institution_ids":["https://openalex.org/I2802706902","https://openalex.org/I75027704"]},{"raw_affiliation_string":"University of Tennessee-Knoxville, Knoxville, TN, USA","institution_ids":["https://openalex.org/I2802706902","https://openalex.org/I75027704"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075517045","display_name":"Jack Dongarra","orcid":"https://orcid.org/0000-0003-3247-1782"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]},{"id":"https://openalex.org/I2802706902","display_name":"Knoxville College","ror":"https://ror.org/02bxrp522","country_code":"US","type":"education","lineage":["https://openalex.org/I2802706902"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jack Dongarra","raw_affiliation_strings":["University of Tennessee, Knoxville, Knoxville, TN, USA","University of Tennessee-Knoxville, Knoxville, TN, USA"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, Knoxville, TN, USA","institution_ids":["https://openalex.org/I2802706902","https://openalex.org/I75027704"]},{"raw_affiliation_string":"University of Tennessee-Knoxville, Knoxville, TN, USA","institution_ids":["https://openalex.org/I2802706902","https://openalex.org/I75027704"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100620408"],"corresponding_institution_ids":["https://openalex.org/I2802706902","https://openalex.org/I75027704"],"apc_list":null,"apc_paid":null,"fwci":20.8614,"has_fulltext":false,"cited_by_count":108,"citation_normalized_percentile":{"value":0.99553445,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"225","last_page":"234"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/checksum","display_name":"Checksum","score":0.7908343076705933},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7866272926330566},{"id":"https://openalex.org/keywords/cholesky-decomposition","display_name":"Cholesky decomposition","score":0.7712889909744263},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.6751279830932617},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6199865937232971},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6010114550590515},{"id":"https://openalex.org/keywords/qr-decomposition","display_name":"QR decomposition","score":0.569129467010498},{"id":"https://openalex.org/keywords/lu-decomposition","display_name":"LU decomposition","score":0.5102744102478027},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5082561373710632},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5051854252815247},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.46975210309028625},{"id":"https://openalex.org/keywords/minimum-degree-algorithm","display_name":"Minimum degree algorithm","score":0.4392774999141693},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.43148475885391235},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.41798311471939087},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.3786769509315491},{"id":"https://openalex.org/keywords/incomplete-cholesky-factorization","display_name":"Incomplete Cholesky factorization","score":0.2592884302139282},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.22565367817878723}],"concepts":[{"id":"https://openalex.org/C162372511","wikidata":"https://www.wikidata.org/wiki/Q218341","display_name":"Checksum","level":2,"score":0.7908343076705933},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7866272926330566},{"id":"https://openalex.org/C34727166","wikidata":"https://www.wikidata.org/wiki/Q515375","display_name":"Cholesky decomposition","level":3,"score":0.7712889909744263},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.6751279830932617},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6199865937232971},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6010114550590515},{"id":"https://openalex.org/C188060507","wikidata":"https://www.wikidata.org/wiki/Q653242","display_name":"QR decomposition","level":3,"score":0.569129467010498},{"id":"https://openalex.org/C123213974","wikidata":"https://www.wikidata.org/wiki/Q833089","display_name":"LU decomposition","level":4,"score":0.5102744102478027},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5082561373710632},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5051854252815247},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.46975210309028625},{"id":"https://openalex.org/C46085209","wikidata":"https://www.wikidata.org/wiki/Q17098969","display_name":"Minimum degree algorithm","level":5,"score":0.4392774999141693},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.43148475885391235},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.41798311471939087},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.3786769509315491},{"id":"https://openalex.org/C44363057","wikidata":"https://www.wikidata.org/wiki/Q6015160","display_name":"Incomplete Cholesky factorization","level":4,"score":0.2592884302139282},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.22565367817878723},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2145816.2145845","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2145816.2145845","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM SIGPLAN symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.650.6030","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.650.6030","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://icl.cs.utk.edu/news_pub/submissions/p225-du.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.819.1251","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.819.1251","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://web.eecs.utk.edu/%7Elibrary/TechReports/2011/ut-cs-11-676.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.6000000238418579}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W28511425","https://openalex.org/W94439627","https://openalex.org/W155647330","https://openalex.org/W1509342228","https://openalex.org/W1576341769","https://openalex.org/W1750594686","https://openalex.org/W1862835629","https://openalex.org/W1967324265","https://openalex.org/W1977323337","https://openalex.org/W2001495258","https://openalex.org/W2072072075","https://openalex.org/W2083606889","https://openalex.org/W2083613288","https://openalex.org/W2096504919","https://openalex.org/W2097946733","https://openalex.org/W2102924960","https://openalex.org/W2107143544","https://openalex.org/W2117293168","https://openalex.org/W2119565742","https://openalex.org/W2120631451","https://openalex.org/W2151984682","https://openalex.org/W2158344138","https://openalex.org/W2165009364","https://openalex.org/W2296772319","https://openalex.org/W2990714382","https://openalex.org/W4231150350","https://openalex.org/W4239025233","https://openalex.org/W4285719527","https://openalex.org/W6639352502","https://openalex.org/W6750968397"],"related_works":["https://openalex.org/W2982181895","https://openalex.org/W2039527536","https://openalex.org/W2035803264","https://openalex.org/W3035209767","https://openalex.org/W4306891336","https://openalex.org/W2052455844","https://openalex.org/W2135765421","https://openalex.org/W4289143641","https://openalex.org/W4240431572","https://openalex.org/W2132622200"],"abstract_inverted_index":{"Dense":[0],"matrix":[1,65,169],"factorizations,":[2,170],"such":[3,75],"as":[4,76],"LU,":[5],"Cholesky":[6],"and":[7,23,83,90,141,193,201,214,219],"QR,":[8],"are":[9,30,110],"widely":[10],"used":[11],"for":[12,102],"scientific":[13],"applications":[14],"that":[15,177],"require":[16],"solving":[17],"systems":[18],"of":[19,43,79,86,112,138,167,190,199],"linear":[20,24],"equations,":[21],"eigenvalues":[22],"least":[25],"squares":[26],"problems.":[27],"Such":[28],"computations":[29],"normally":[31],"carried":[32],"out":[33],"on":[34,58,204],"supercomputers,":[35],"whose":[36],"ever-growing":[37],"scale":[38],"induces":[39],"a":[40,53,93,99,129,164],"fast":[41],"decline":[42],"the":[44,77,84,104,108,118,122,144,149,178,185,188,194,205,211],"Mean":[45],"Time":[46],"To":[47],"Failure":[48],"(MTTF).":[49],"This":[50,133],"paper":[51],"proposes":[52],"new":[54],"hybrid":[55,159],"approach,":[56],"based":[57],"Algorithm-Based":[59],"Fault":[60],"Tolerance":[61],"(ABFT),":[62],"to":[63,163],"help":[64],"factorizations":[66],"algorithms":[67,155],"survive":[68],"fail-stop":[69],"failures.":[70],"We":[71,96],"consider":[72],"extreme":[73],"conditions,":[74],"absence":[78],"any":[80],"reliable":[81],"component":[82],"possibility":[85],"loosing":[87],"both":[88],"data":[89],"checksum":[91,145],"from":[92,148,157],"single":[94],"failure.":[95],"will":[97],"present":[98],"generic":[100],"solution":[101,160],"protecting":[103],"right":[105,150],"factor,":[106,120],"where":[107,121],"updates":[109],"applied,":[111,126],"all":[113],"above":[114],"mentioned":[115],"factorizations.":[116],"For":[117],"left":[119],"panel":[123],"has":[124],"been":[125],"we":[127],"propose":[128],"scalable":[130],"checkpointing":[131,139],"algorithm.":[132],"algorithm":[134],"features":[135],"high":[136],"degree":[137],"parallelism":[140],"cooperatively":[142],"utilizes":[143],"storage":[146],"leftover":[147],"factor":[151],"protection.":[152],"The":[153],"fault-tolerant":[154],"derived":[156],"this":[158],"is":[161],"applicable":[162],"wide":[165],"range":[166],"dense":[168],"with":[171,184],"minor":[172],"modifications.":[173],"Theoretical":[174],"analysis":[175],"shows":[176],"fault":[179],"tolerance":[180],"overhead":[181],"sharply":[182],"decreases":[183],"scaling":[186],"in":[187],"number":[189],"computing":[191],"units":[192],"problem":[195],"size.":[196],"Experimental":[197],"results":[198],"LU":[200],"QR":[202],"factorization":[203],"Kraken":[206],"(Cray":[207],"XT5)":[208],"supercomputer":[209],"validate":[210],"theoretical":[212],"evaluation":[213],"confirm":[215],"negligible":[216],"overhead,":[217],"with-":[218],"without-errors.":[220]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":12},{"year":2016,"cited_by_count":12},{"year":2015,"cited_by_count":19},{"year":2014,"cited_by_count":14},{"year":2013,"cited_by_count":16},{"year":2012,"cited_by_count":7}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
