{"id":"https://openalex.org/W2967835621","doi":"https://doi.org/10.1109/smacd.2019.8795243","title":"Long-Term Reliability Management For Multitasking GPGPUs","display_name":"Long-Term Reliability Management For Multitasking GPGPUs","publication_year":2019,"publication_date":"2019-07-01","ids":{"openalex":"https://openalex.org/W2967835621","doi":"https://doi.org/10.1109/smacd.2019.8795243","mag":"2967835621"},"language":"en","primary_location":{"id":"doi:10.1109/smacd.2019.8795243","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smacd.2019.8795243","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 16th International Conference on Synthesis, Modeling, Analysis and Simulation Methods and Applications to Circuit Design (SMACD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027081374","display_name":"Zeyu Sun","orcid":"https://orcid.org/0000-0001-7465-1824"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zeyu Sun","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of California, Riverside"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of California, Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100412370","display_name":"Taeyoung Kim","orcid":"https://orcid.org/0000-0002-8353-1776"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Taeyoung Kim","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California, Riverside"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California, Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011003147","display_name":"Marcus Chow","orcid":"https://orcid.org/0000-0002-2577-8914"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marcus Chow","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California, Riverside"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California, Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040952356","display_name":"Shaoyi Peng","orcid":"https://orcid.org/0000-0001-9963-1504"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shaoyi Peng","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of California, Riverside"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of California, Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084377832","display_name":"Han Zhou","orcid":"https://orcid.org/0000-0002-9097-4567"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Han Zhou","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of California, Riverside"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of California, Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081504885","display_name":"Hyoseung Kim","orcid":"https://orcid.org/0000-0002-8553-732X"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hyoseung Kim","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of California, Riverside"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of California, Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000712719","display_name":"Daniel Wong","orcid":"https://orcid.org/0000-0002-5376-7868"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Wong","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of California, Riverside"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of California, Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058844682","display_name":"Sheldon X.-D. Tan","orcid":"https://orcid.org/0000-0003-2119-6869"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sheldon X.-D. Tan","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of California, Riverside"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of California, Riverside","institution_ids":["https://openalex.org/I103635307"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5027081374"],"corresponding_institution_ids":["https://openalex.org/I103635307"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.10984943,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"213","last_page":"216"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10472","display_name":"Semiconductor materials and devices","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10083","display_name":"Graphene research and applications","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/human-multitasking","display_name":"Human multitasking","score":0.8533332347869873},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8354067206382751},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.570012092590332},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5075324773788452},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.47188130021095276},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.4673630893230438},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4528428614139557},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.4471096992492676},{"id":"https://openalex.org/keywords/reliability-block-diagram","display_name":"Reliability block diagram","score":0.43985307216644287},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.42280322313308716},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.42156344652175903},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.395590603351593},{"id":"https://openalex.org/keywords/reliability-engineering","display_name":"Reliability engineering","score":0.23092129826545715},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.20114687085151672},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.16693121194839478},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.13442721962928772}],"concepts":[{"id":"https://openalex.org/C107418235","wikidata":"https://www.wikidata.org/wiki/Q1520565","display_name":"Human multitasking","level":2,"score":0.8533332347869873},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8354067206382751},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.570012092590332},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5075324773788452},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.47188130021095276},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.4673630893230438},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4528428614139557},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.4471096992492676},{"id":"https://openalex.org/C18074226","wikidata":"https://www.wikidata.org/wiki/Q7310986","display_name":"Reliability block diagram","level":3,"score":0.43985307216644287},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.42280322313308716},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.42156344652175903},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.395590603351593},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.23092129826545715},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.20114687085151672},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.16693121194839478},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.13442721962928772},{"id":"https://openalex.org/C107094494","wikidata":"https://www.wikidata.org/wiki/Q428453","display_name":"Fault tree analysis","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smacd.2019.8795243","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smacd.2019.8795243","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 16th International Conference on Synthesis, Modeling, Analysis and Simulation Methods and Applications to Circuit Design (SMACD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W1890614305","https://openalex.org/W1966243865","https://openalex.org/W2091664166","https://openalex.org/W2150851481","https://openalex.org/W2323693848","https://openalex.org/W4241472700","https://openalex.org/W6639550676"],"related_works":["https://openalex.org/W2151046618","https://openalex.org/W1972148443","https://openalex.org/W1969233021","https://openalex.org/W2167646277","https://openalex.org/W2063573318","https://openalex.org/W2027443981","https://openalex.org/W2388314963","https://openalex.org/W3158047141","https://openalex.org/W1656096860","https://openalex.org/W2360624069"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"long-term":[3,17,116,150],"reliability":[4,53,67],"management":[5],"for":[6,39,98],"spatial":[7,76,99,136],"multitasking":[8,100,137],"GPU":[9,44,62,70,80,106],"architectures.":[10,45],"Specifically,":[11],"we":[12,72],"focus":[13,74],"on":[14,75],"electromigration":[15],"(EM)-induced":[16],"failure":[18],"of":[19,43,135],"the":[20,57,61,92],"GPU's":[21],"power":[22,27],"delivery":[23,28],"network.":[24],"A":[25],"distributed":[26],"network":[29],"model":[30,54],"at":[31,60],"functional":[32],"unit":[33],"granularity":[34],"is":[35,101],"developed":[36],"and":[37,55,113,123,138],"used":[38],"our":[40,141],"EM":[41,52],"analysis":[42],"We":[46,89,111,130],"use":[47,133],"a":[48,66,115],"recently":[49],"proposed":[50,142],"physics-based":[51],"consider":[56],"EM-induced":[58],"time-to-failure":[59],"system":[63],"level":[64],"as":[65],"resource.":[68],"For":[69],"scheduling,":[71],"mainly":[73],"multitasking,":[77],"which":[78],"allows":[79],"computing":[81],"resources":[82],"to":[83,146],"be":[84],"partitioned":[85],"among":[86],"multiple":[87],"applications.":[88],"find":[90,139],"that":[91,140],"existing":[93,127],"reliability-agnostic":[94,128],"thread":[95,118],"block":[96,119],"scheduler":[97,120,143],"effective":[102],"in":[103,121,149],"achieving":[104],"high":[105],"utilization,":[107],"but":[108],"poor":[109],"reliability.":[110,151],"develop":[112],"implement":[114],"reliability-aware":[117],"GPGPU-Sim,":[122],"compare":[124],"it":[125],"against":[126],"scheduler.":[129],"evaluate":[131],"several":[132],"cases":[134],"achieves":[144],"up":[145],"30%":[147],"improvement":[148]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
