{"id":"https://openalex.org/W4408564311","doi":"https://doi.org/10.1109/tpds.2025.3552679","title":"GEREM: Fast and Precise Error Resilience Assessment for GPU Microarchitectures","display_name":"GEREM: Fast and Precise Error Resilience Assessment for GPU Microarchitectures","publication_year":2025,"publication_date":"2025-03-18","ids":{"openalex":"https://openalex.org/W4408564311","doi":"https://doi.org/10.1109/tpds.2025.3552679"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2025.3552679","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2025.3552679","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006875686","display_name":"Jingweijia Tan","orcid":"https://orcid.org/0000-0001-8256-7585"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jingweijia Tan","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, Changchun, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, Changchun, China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031781862","display_name":"Xinjun Li","orcid":"https://orcid.org/0000-0003-0220-9003"},"institutions":[{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]},{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xurui Li","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, Changchun, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, Changchun, China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116677744","display_name":"An Zhong","orcid":null},"institutions":[{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]},{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"An Zhong","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, Changchun, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, Changchun, China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007584180","display_name":"Kaige Yan","orcid":"https://orcid.org/0000-0001-9154-3294"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaige Yan","raw_affiliation_strings":["College of Communication Engineering, Jilin University, Changchun, China"],"affiliations":[{"raw_affiliation_string":"College of Communication Engineering, Jilin University, Changchun, China","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101467860","display_name":"Xiaohui Wei","orcid":"https://orcid.org/0000-0001-5597-3625"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaohui Wei","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, Changchun, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, Changchun, China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025726299","display_name":"Guanpeng Li","orcid":"https://orcid.org/0000-0001-7773-7826"},"institutions":[{"id":"https://openalex.org/I126307644","display_name":"University of Iowa","ror":"https://ror.org/036jqmy94","country_code":"US","type":"education","lineage":["https://openalex.org/I126307644"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guanpeng Li","raw_affiliation_strings":["Computer Science Department, University of Iowa, Iowa City, IA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, University of Iowa, Iowa City, IA, USA","institution_ids":["https://openalex.org/I126307644"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5006875686"],"corresponding_institution_ids":["https://openalex.org/I194450716","https://openalex.org/I4210134929"],"apc_list":null,"apc_paid":null,"fwci":0.7467,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.69572294,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"36","issue":"5","first_page":"1011","last_page":"1024"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9567000269889832,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10472","display_name":"Semiconductor materials and devices","score":0.954800009727478,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.859180212020874},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7045440077781677},{"id":"https://openalex.org/keywords/resilience","display_name":"Resilience (materials science)","score":0.6440055966377258},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4464086890220642},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4062720239162445},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.21917393803596497},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.20194846391677856}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.859180212020874},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7045440077781677},{"id":"https://openalex.org/C2779585090","wikidata":"https://www.wikidata.org/wiki/Q3457762","display_name":"Resilience (materials science)","level":2,"score":0.6440055966377258},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4464086890220642},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4062720239162445},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.21917393803596497},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.20194846391677856},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2025.3552679","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2025.3552679","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4000000059604645,"display_name":"Climate action","id":"https://metadata.un.org/sdg/13"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1979527452","https://openalex.org/W2027716782","https://openalex.org/W2031533321","https://openalex.org/W2080592089","https://openalex.org/W2128120785","https://openalex.org/W2411279070","https://openalex.org/W2593357442","https://openalex.org/W2612733213","https://openalex.org/W2626574314","https://openalex.org/W2735162286","https://openalex.org/W2751243270","https://openalex.org/W2902078285","https://openalex.org/W2905075051","https://openalex.org/W2945222589","https://openalex.org/W2983012040","https://openalex.org/W2991943931","https://openalex.org/W3012032150","https://openalex.org/W3013374519","https://openalex.org/W3016875561","https://openalex.org/W3022067362","https://openalex.org/W3031001329","https://openalex.org/W3035953471","https://openalex.org/W3042418709","https://openalex.org/W3088770438","https://openalex.org/W3102510044","https://openalex.org/W3120587606","https://openalex.org/W3129298663","https://openalex.org/W3149134903","https://openalex.org/W3187552919","https://openalex.org/W3189398641","https://openalex.org/W3196044848","https://openalex.org/W3211099997","https://openalex.org/W4232172996","https://openalex.org/W4244241786","https://openalex.org/W4249144718","https://openalex.org/W4283689961","https://openalex.org/W4308083843","https://openalex.org/W4360831835","https://openalex.org/W4385236993","https://openalex.org/W4388505273","https://openalex.org/W6857540570"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2505380084","https://openalex.org/W2390279801","https://openalex.org/W4400333498","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W1980160788"],"abstract_inverted_index":{"GPUs":[0,19,35],"are":[1,20,36,47,55,130,141,153,208],"widely":[2],"used":[3,49,97,210,223],"hardware":[4,24,40,117],"acceleration":[5],"platforms":[6],"in":[7,26],"many":[8],"areas":[9],"due":[10],"to":[11,22,71,79,98,199,211],"their":[12],"great":[13],"computational":[14],"throughput.":[15],"In":[16,62],"the":[17,27,31,81,101,113,167,182,196,232],"meanwhile,":[18],"vulnerable":[21],"transient":[23],"faults":[25,118,194],"post-Moore":[28],"era.":[29],"Analyzing":[30],"error":[32,51,82,233],"resilience":[33,52,83,234],"of":[34,84,104,116,134,144,235],"critical":[37],"for":[38,50,137,186,217,224],"both":[39],"and":[41,88],"software.":[42],"Statistical":[43],"fault":[44,74,93,172,214,248],"injection":[45,75],"approaches":[46],"commonly":[48],"analysis,":[53],"which":[54],"highly":[56],"accurate":[57],"but":[58],"very":[59],"time":[60],"consuming.":[61],"this":[63,109],"work,":[64],"we":[65,111,160],"propose":[66],"GEREM,":[67],"a":[68],"first":[69,180],"framework":[70,179],"speed":[72],"up":[73],"process":[76,170],"so":[77,159],"as":[78],"estimate":[80],"GPU":[85,120,236],"microarchitectures":[86],"swiftly":[87],"precisely.":[89],"We":[90,148],"find":[91],"early":[92,114,131],"behaviors":[94,115],"can":[95,161],"be":[96],"accurately":[99],"predict":[100,212],"final":[102,213],"outcomes":[103],"program":[105,168],"execution.":[106],"Based":[107],"on":[108,243],"observation,":[110],"categorize":[112],"into":[119,195],"Early":[121],"Fault":[122],"Manifestation":[123],"models":[124],"(EFMs).":[125],"For":[126,203],"data":[127,204],"structures,":[128,206],"EFMs":[129,140,152,187,207],"propagation":[132],"characteristics":[133],"faults,":[135],"while":[136,216],"pipeline":[138,218],"instructions,":[139,219],"heuristic":[142],"properties":[143],"several":[145],"instruction":[146],"contexts.":[147],"further":[149],"observe":[150],"that":[151],"determined":[154],"by":[155],"static":[156],"microarchitecture":[157,237],"states,":[158],"capture":[162],"them":[163],"without":[164],"actually":[165],"simulating":[166],"execution":[169],"under":[171],"injections.":[173,249],"Leveraging":[174],"these":[175],"observations,":[176],"our":[177],"GEREM":[178,229],"profiles":[181],"microarchitectural":[183],"states":[184],"related":[185],"at":[188],"one":[189],"time.":[190],"It":[191],"then":[192],"injects":[193],"profiled":[197],"traces":[198],"immediately":[200],"generate":[201],"EFMs.":[202],"storage":[205],"directly":[209],"outcomes,":[215],"machine":[220],"learning":[221],"is":[222],"prediction.":[225],"Evaluation":[226],"results":[227],"show":[228],"precisely":[230],"assesses":[231],"structures":[238],"with":[239,246],"<inline-formula><tex-math":[240],"notation=\"LaTeX\">$237\\times$</tex-math></inline-formula>":[241],"speedup":[242],"average":[244],"comparing":[245],"traditional":[247]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
