{"id":"https://openalex.org/W2316016107","doi":"https://doi.org/10.1109/tpds.2016.2517633","title":"A Systematic Methodology for Evaluating the Error Resilience of GPGPU Applications","display_name":"A Systematic Methodology for Evaluating the Error Resilience of GPGPU Applications","publication_year":2016,"publication_date":"2016-03-07","ids":{"openalex":"https://openalex.org/W2316016107","doi":"https://doi.org/10.1109/tpds.2016.2517633","mag":"2316016107"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2016.2517633","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2016.2517633","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101558674","display_name":"Bo Fang","orcid":"https://orcid.org/0000-0001-7694-5527"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Bo Fang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of British Columbia"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of British Columbia","institution_ids":["https://openalex.org/I141945490"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073641368","display_name":"Karthik Pattabiraman","orcid":"https://orcid.org/0000-0003-2380-3415"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Karthik Pattabiraman","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of British Columbia"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of British Columbia","institution_ids":["https://openalex.org/I141945490"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088755273","display_name":"Matei Ripeanu","orcid":"https://orcid.org/0000-0001-9839-3866"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Matei Ripeanu","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of British Columbia"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of British Columbia","institution_ids":["https://openalex.org/I141945490"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078748445","display_name":"Sudhanva Gurumurthi","orcid":"https://orcid.org/0000-0002-1740-7304"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sudhanva Gurumurthi","raw_affiliation_strings":["Cloud Innovation Lab, IBM Corporation"],"affiliations":[{"raw_affiliation_string":"Cloud Innovation Lab, IBM Corporation","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101558674"],"corresponding_institution_ids":["https://openalex.org/I141945490"],"apc_list":null,"apc_paid":null,"fwci":1.1026,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.79577897,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"27","issue":"12","first_page":"3397","last_page":"3411"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fault-injection","display_name":"Fault injection","score":0.8641244769096375},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8431482315063477},{"id":"https://openalex.org/keywords/resilience","display_name":"Resilience (materials science)","score":0.7322344779968262},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.6782975792884827},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5878864526748657},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.5378661751747131},{"id":"https://openalex.org/keywords/representativeness-heuristic","display_name":"Representativeness heuristic","score":0.5222626328468323},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.47919759154319763},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.43586355447769165},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.41117945313453674},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3989037275314331},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.38888317346572876},{"id":"https://openalex.org/keywords/reliability-engineering","display_name":"Reliability engineering","score":0.37921562790870667},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.33468756079673767},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.32247433066368103},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.13481023907661438},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.109209805727005}],"concepts":[{"id":"https://openalex.org/C2775928411","wikidata":"https://www.wikidata.org/wiki/Q2041312","display_name":"Fault injection","level":3,"score":0.8641244769096375},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8431482315063477},{"id":"https://openalex.org/C2779585090","wikidata":"https://www.wikidata.org/wiki/Q3457762","display_name":"Resilience (materials science)","level":2,"score":0.7322344779968262},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.6782975792884827},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5878864526748657},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.5378661751747131},{"id":"https://openalex.org/C37381756","wikidata":"https://www.wikidata.org/wiki/Q20203288","display_name":"Representativeness heuristic","level":2,"score":0.5222626328468323},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.47919759154319763},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.43586355447769165},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.41117945313453674},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3989037275314331},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.38888317346572876},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.37921562790870667},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.33468756079673767},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.32247433066368103},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.13481023907661438},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.109209805727005},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2016.2517633","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2016.2517633","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W1838389500","https://openalex.org/W1978082708","https://openalex.org/W1979527452","https://openalex.org/W1988070283","https://openalex.org/W2008185810","https://openalex.org/W2010043262","https://openalex.org/W2013280342","https://openalex.org/W2017521824","https://openalex.org/W2031533321","https://openalex.org/W2042203635","https://openalex.org/W2047171058","https://openalex.org/W2051296307","https://openalex.org/W2052887509","https://openalex.org/W2067270725","https://openalex.org/W2080592089","https://openalex.org/W2109473404","https://openalex.org/W2118558147","https://openalex.org/W2121082877","https://openalex.org/W2123475473","https://openalex.org/W2123907700","https://openalex.org/W2125169487","https://openalex.org/W2132362854","https://openalex.org/W2139375423","https://openalex.org/W2145071552","https://openalex.org/W2147732182","https://openalex.org/W2148602057","https://openalex.org/W2150267144","https://openalex.org/W2152365194","https://openalex.org/W2155503253","https://openalex.org/W2155887629","https://openalex.org/W2156519507","https://openalex.org/W2161061943","https://openalex.org/W2162546540","https://openalex.org/W2273440736","https://openalex.org/W2536495848","https://openalex.org/W2541274825","https://openalex.org/W2598720231","https://openalex.org/W4231474137","https://openalex.org/W4233556486","https://openalex.org/W4241417426","https://openalex.org/W4255519882","https://openalex.org/W6661186670","https://openalex.org/W6677993110","https://openalex.org/W6678286823","https://openalex.org/W6681612278","https://openalex.org/W6682722398","https://openalex.org/W6683923952","https://openalex.org/W6694513646","https://openalex.org/W6735844073"],"related_works":["https://openalex.org/W2151046618","https://openalex.org/W1972148443","https://openalex.org/W1969233021","https://openalex.org/W2167646277","https://openalex.org/W2063573318","https://openalex.org/W2388314963","https://openalex.org/W3158047141","https://openalex.org/W2027716782","https://openalex.org/W1656096860","https://openalex.org/W2027443981"],"abstract_inverted_index":{"The":[0],"wide":[1],"adoption":[2],"of":[3,18,34,80,111,122,137],"graphics":[4],"processing":[5],"units":[6],"(GPUs)":[7],"as":[8],"accelerators":[9],"for":[10,41],"general-purpose":[11,42],"applications":[12,44,138],"makes":[13,53,64],"the":[14,32,76,106,109,118,134],"end-to-end":[15,77],"reliability":[16,78],"implications":[17],"their":[19,49,140],"use":[20],"increasingly":[21],"significant.":[22],"Fault":[23],"injection":[24],"is":[25,45],"a":[26,38,71,90,100,112],"widely":[27],"adopted":[28],"method":[29],"to":[30,48,56,74],"evaluate":[31,75],"resilience":[33,120],"applications.":[35],"However,":[36],"building":[37],"fault":[39],"injector":[40],"GPU":[43,96],"challenging":[46],"due":[47],"massive":[50],"parallelism,":[51],"which":[52],"it":[54,69,87,116,127],"difficult":[55],"achieve":[57],"representativeness":[58,107],"while":[59],"being":[60],"time-efficient.":[61],"This":[62],"paper":[63],"four":[65],"key":[66],"contributions.":[67],"First,":[68],"presents":[70],"fault-injection":[72,91,113],"methodology":[73],"properties":[79,136],"application":[81,124],"kernels":[82],"running":[83],"on":[84,131],"GPUs.":[85],"Second,":[86],"introduces":[88],"GPU-Qin,":[89],"tool":[92],"that":[93],"uses":[94],"real":[95],"hardware":[97],"and":[98,102,108,139],"offers":[99],"tunable":[101],"efficient":[103],"balance":[104],"between":[105,133],"cost":[110],"campaign.":[114],"Third,":[115],"characterizes":[117],"error":[119,141],"characteristics":[121],"seventeen":[123],"kernels.":[125],"Finally,":[126],"provides":[128],"preliminary":[129],"insights":[130],"correlations":[132],"algorithmic":[135],"resilience.":[142]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
