{"id":"https://openalex.org/W1977646937","doi":"https://doi.org/10.1109/hpec.2014.7040999","title":"An evaluation of lazy fault detection based on Adaptive Redundant Multithreading","display_name":"An evaluation of lazy fault detection based on Adaptive Redundant Multithreading","publication_year":2014,"publication_date":"2014-09-01","ids":{"openalex":"https://openalex.org/W1977646937","doi":"https://doi.org/10.1109/hpec.2014.7040999","mag":"1977646937"},"language":"en","primary_location":{"id":"doi:10.1109/hpec.2014.7040999","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2014.7040999","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065625558","display_name":"Saurabh Hukerikar","orcid":"https://orcid.org/0000-0002-2612-2001"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Saurabh Hukerikar","raw_affiliation_strings":["Information Sciences Institute University of Southern California, Marina del Rey, CA, USA","Information Sciences Institute, University of Southern California, Marina Del Rey, USA"],"affiliations":[{"raw_affiliation_string":"Information Sciences Institute University of Southern California, Marina del Rey, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Information Sciences Institute, University of Southern California, Marina Del Rey, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019745966","display_name":"Keita Teranishi","orcid":"https://orcid.org/0000-0001-6647-2690"},"institutions":[{"id":"https://openalex.org/I192454743","display_name":"Sandia National Laboratories California","ror":"https://ror.org/058m7ey48","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1330989302","https://openalex.org/I192454743","https://openalex.org/I198811213","https://openalex.org/I198811213","https://openalex.org/I4210104735"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Keita Teranishi","raw_affiliation_strings":["Sandia National Laboratories, Livermore, CA, USA","Sandia National Labs Livermore CA, USA"],"affiliations":[{"raw_affiliation_string":"Sandia National Laboratories, Livermore, CA, USA","institution_ids":["https://openalex.org/I192454743"]},{"raw_affiliation_string":"Sandia National Labs Livermore CA, USA","institution_ids":["https://openalex.org/I192454743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042540194","display_name":"Pedro C. Diniz","orcid":"https://orcid.org/0000-0003-3131-9367"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pedro C. Diniz","raw_affiliation_strings":["Information Sciences Institute University of Southern California, Marina del Rey, CA, USA","Information Sciences Institute, University of Southern California, Marina Del Rey, USA"],"affiliations":[{"raw_affiliation_string":"Information Sciences Institute University of Southern California, Marina del Rey, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Information Sciences Institute, University of Southern California, Marina Del Rey, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025422213","display_name":"Robert F. Lucas","orcid":"https://orcid.org/0000-0003-1303-2012"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Robert F. Lucas","raw_affiliation_strings":["Information Sciences Institute University of Southern California, Marina del Rey, CA, USA","Information Sciences Institute, University of Southern California, Marina Del Rey, USA"],"affiliations":[{"raw_affiliation_string":"Information Sciences Institute University of Southern California, Marina del Rey, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Information Sciences Institute, University of Southern California, Marina Del Rey, USA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5065625558"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":2.7585,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.90877072,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.8942957520484924},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8530677556991577},{"id":"https://openalex.org/keywords/fault-detection-and-isolation","display_name":"Fault detection and isolation","score":0.6604912877082825},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.6373165249824524},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.5814138054847717},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.5787874460220337},{"id":"https://openalex.org/keywords/programmer","display_name":"Programmer","score":0.5726478695869446},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.5551309585571289},{"id":"https://openalex.org/keywords/simultaneous-multithreading","display_name":"Simultaneous multithreading","score":0.5022182464599609},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.45348283648490906},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4398176074028015},{"id":"https://openalex.org/keywords/yarn","display_name":"Yarn","score":0.43257877230644226},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.4210693836212158},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3394002914428711},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3363876938819885},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1775030493736267},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.12834912538528442}],"concepts":[{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.8942957520484924},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8530677556991577},{"id":"https://openalex.org/C152745839","wikidata":"https://www.wikidata.org/wiki/Q5438153","display_name":"Fault detection and isolation","level":3,"score":0.6604912877082825},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.6373165249824524},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.5814138054847717},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.5787874460220337},{"id":"https://openalex.org/C2778514511","wikidata":"https://www.wikidata.org/wiki/Q1374194","display_name":"Programmer","level":2,"score":0.5726478695869446},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5551309585571289},{"id":"https://openalex.org/C85717602","wikidata":"https://www.wikidata.org/wiki/Q82178","display_name":"Simultaneous multithreading","level":4,"score":0.5022182464599609},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.45348283648490906},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4398176074028015},{"id":"https://openalex.org/C2778787235","wikidata":"https://www.wikidata.org/wiki/Q49007","display_name":"Yarn","level":2,"score":0.43257877230644226},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.4210693836212158},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3394002914428711},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3363876938819885},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1775030493736267},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.12834912538528442},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C172707124","wikidata":"https://www.wikidata.org/wiki/Q423488","display_name":"Actuator","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec.2014.7040999","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2014.7040999","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320332369","display_name":"National Nuclear Security Administration","ror":"https://ror.org/03sk1we31"},{"id":"https://openalex.org/F4320337367","display_name":"Division of Materials Research","ror":"https://ror.org/01pc7k308"},{"id":"https://openalex.org/F4320337506","display_name":"Advanced Scientific Computing Research","ror":"https://ror.org/0012c7r22"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"},{"id":"https://openalex.org/F4320338291","display_name":"Sandia National Laboratories","ror":"https://ror.org/01apwpt12"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W212533765","https://openalex.org/W1993660990","https://openalex.org/W1997126580","https://openalex.org/W2012179240","https://openalex.org/W2034593585","https://openalex.org/W2036641664","https://openalex.org/W2038238534","https://openalex.org/W2061729790","https://openalex.org/W2068790862","https://openalex.org/W2072072075","https://openalex.org/W2083613288","https://openalex.org/W2102480715","https://openalex.org/W2105524676","https://openalex.org/W2112360707","https://openalex.org/W2116015411","https://openalex.org/W2116059696","https://openalex.org/W2118629573","https://openalex.org/W2130189691","https://openalex.org/W2145064068","https://openalex.org/W2952351386","https://openalex.org/W4243863555","https://openalex.org/W4248707645","https://openalex.org/W4253094798","https://openalex.org/W6675837356","https://openalex.org/W6685186519"],"related_works":["https://openalex.org/W2118532220","https://openalex.org/W2115561485","https://openalex.org/W1985089255","https://openalex.org/W2153202644","https://openalex.org/W2010970156","https://openalex.org/W2380961080","https://openalex.org/W4235861380","https://openalex.org/W2106625514","https://openalex.org/W2726029565","https://openalex.org/W1867214769"],"abstract_inverted_index":{"The":[0,105,140],"challenge":[1],"of":[2,22,34,48,57,155,201],"resilience":[3],"for":[4,11,198],"High":[5],"Performance":[6],"Computing":[7],"applications":[8],"is":[9,112,126,187],"significant":[10],"future":[12,63],"extreme":[13],"scale":[14],"systems.":[15],"These":[16],"systems":[17,66],"will":[18,28,67],"experience":[19],"unprecedented":[20],"rates":[21],"faults":[23],"and":[24,54,177,189],"errors":[25],"as":[26],"they":[27],"be":[29],"constructed":[30],"from":[31],"massive":[32],"numbers":[33],"components":[35],"that":[36,81,172,183,212],"are":[37],"inherently":[38],"less":[39],"reliable":[40],"than":[41],"those":[42],"available":[43],"today.":[44],"While":[45],"the":[46,109,123,153,159,164,170,173,216,219],"use":[47,61,154],"redundant":[49,89,124,174,220],"computing":[50],"can":[51],"provide":[52,130],"detection":[53,86,132,143,186],"possible":[55],"correction":[56],"errors,":[58],"its":[59],"system-wide":[60],"in":[62,108,146],"extreme-scale":[64],"HPC":[65],"incur":[68],"considerable":[69],"overheads":[70,136],"to":[71,129,137,150,193,214],"application":[72,83,138],"performance.":[73,139],"In":[74,91,117],"this":[75,147],"paper,":[76],"we":[77,94],"present":[78],"a":[79,101,120,199,208],"framework":[80],"provides":[82],"level":[84,103],"fault":[85,131,142,165],"based":[87,99],"on":[88,100],"multithreading.":[90],"previous":[92],"work,":[93],"demonstrated":[95],"an":[96],"adaptive":[97,196],"approach":[98,144,168],"language":[102],"directive.":[104],"computation":[106,162],"contained":[107],"programmer":[110],"directive":[111],"executed":[113],"by":[114,157],"duplicate":[115],"threads.":[116,221],"concert":[118],"with":[119],"runtime":[121],"system,":[122],"multithreading":[125],"enabled":[127],"opportunistically":[128],"at":[133],"more":[134],"reasonable":[135],"lazy":[141,184],"presented":[145],"work":[148],"seeks":[149,213],"further":[151],"optimize":[152],"redundancy":[156],"prioritizing":[158],"application's":[160],"primary":[161],"over":[163,195],"detection.":[166],"Our":[167],"relaxes":[169],"requirement":[171],"threads":[175],"synchronize":[176],"compare":[178],"results":[179],"immediately.":[180],"We":[181,205],"show":[182],"error":[185],"feasible":[188],"yields":[190],"lower":[191],"time":[192],"solution":[194],"RMT":[197],"range":[200],"scientific":[202],"computational":[203],"kernels.":[204],"also":[206],"explore":[207],"thread-to-core":[209],"assignment":[210],"strategy":[211],"reduce":[215],"interference":[217],"between":[218]},"counts_by_year":[{"year":2021,"cited_by_count":3},{"year":2019,"cited_by_count":3},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
