{"id":"https://openalex.org/W4401336195","doi":"https://doi.org/10.1109/iolts60994.2024.10616087","title":"Effective Application-level Error Modeling of Permanent Faults on AI Accelerators","display_name":"Effective Application-level Error Modeling of Permanent Faults on AI Accelerators","publication_year":2024,"publication_date":"2024-07-03","ids":{"openalex":"https://openalex.org/W4401336195","doi":"https://doi.org/10.1109/iolts60994.2024.10616087"},"language":"en","primary_location":{"id":"doi:10.1109/iolts60994.2024.10616087","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iolts60994.2024.10616087","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 30th International Symposium on On-Line Testing and Robust System Design (IOLTS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5098932940","display_name":"Francesco Pessia","orcid":null},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Polytechnic University of Turin","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Francesco Pessia","raw_affiliation_strings":["Politecnico di Torino,Department of Control and Computer Engineering (DAUIN),Turin,Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Torino,Department of Control and Computer Engineering (DAUIN),Turin,Italy","institution_ids":["https://openalex.org/I177477856"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046650816","display_name":"Juan-David Guerrero-Balaguera","orcid":"https://orcid.org/0000-0001-6852-2372"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Polytechnic University of Turin","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Juan-David Guerrero-Balaguera","raw_affiliation_strings":["Politecnico di Torino,Department of Control and Computer Engineering (DAUIN),Turin,Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Torino,Department of Control and Computer Engineering (DAUIN),Turin,Italy","institution_ids":["https://openalex.org/I177477856"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006252702","display_name":"Robert Limas Sierra","orcid":"https://orcid.org/0000-0001-5206-3757"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Polytechnic University of Turin","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Robert Limas Sierra","raw_affiliation_strings":["Politecnico di Torino,Department of Control and Computer Engineering (DAUIN),Turin,Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Torino,Department of Control and Computer Engineering (DAUIN),Turin,Italy","institution_ids":["https://openalex.org/I177477856"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107558399","display_name":"Josie E. Rodriguez Condia","orcid":"https://orcid.org/0000-0001-5957-5624"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Polytechnic University of Turin","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Josie E. Rodriguez Condia","raw_affiliation_strings":["Politecnico di Torino,Department of Control and Computer Engineering (DAUIN),Turin,Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Torino,Department of Control and Computer Engineering (DAUIN),Turin,Italy","institution_ids":["https://openalex.org/I177477856"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051196445","display_name":"Marco Levorato","orcid":"https://orcid.org/0000-0002-6920-4189"},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marco Levorato","raw_affiliation_strings":["University of California,Computer Science Department,Irvine,US"],"affiliations":[{"raw_affiliation_string":"University of California,Computer Science Department,Irvine,US","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058555274","display_name":"M. Sonza Reorda","orcid":"https://orcid.org/0000-0003-2899-7669"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Polytechnic University of Turin","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Matteo Sonza Reorda","raw_affiliation_strings":["Politecnico di Torino,Department of Control and Computer Engineering (DAUIN),Turin,Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Torino,Department of Control and Computer Engineering (DAUIN),Turin,Italy","institution_ids":["https://openalex.org/I177477856"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5098932940"],"corresponding_institution_ids":["https://openalex.org/I177477856"],"apc_list":null,"apc_paid":null,"fwci":0.6989,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.69834871,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13293","display_name":"Engineering and Test Systems","score":0.9649999737739563,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.9628000259399414,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6925768256187439},{"id":"https://openalex.org/keywords/reliability-engineering","display_name":"Reliability engineering","score":0.43814441561698914},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12460410594940186}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6925768256187439},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.43814441561698914},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12460410594940186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iolts60994.2024.10616087","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iolts60994.2024.10616087","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 30th International Symposium on On-Line Testing and Robust System Design (IOLTS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6800000071525574,"display_name":"Climate action","id":"https://metadata.un.org/sdg/13"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"The":[0],"deployment":[1],"of":[2,79,101,150,158],"Machine":[3],"Learning":[4],"(ML)":[5],"applications":[6,103,126],"extensively":[7],"leverages":[8],"Matrix":[9],"Multiplication":[10],"(MM)":[11],"operations":[12],"on":[13,145,197],"modern":[14],"and":[15,69,72,88,193],"advanced":[16],"accelerators,":[17],"like":[18,127],"Graphic":[19],"Processing":[20],"Units":[21,27],"(GPUs),":[22],"which":[23],"employ":[24],"Tensor":[25],"Core":[26],"(TCUs)":[28],"to":[29,65,84,138,169,184],"optimize":[30],"MM\u2019s":[31],"execution":[32],"efficiently.":[33],"However,":[34],"reliability":[35,99,156],"concerns":[36],"arise":[37],"in":[38,164],"devices":[39],"with":[40,172],"cutting-edge":[41],"semiconductor":[42],"technologies":[43],"(7":[44],"nm":[45],"or":[46],"less),":[47],"as":[48],"faults":[49,81,106,144],"can":[50,63],"compromise":[51],"some":[52],"structures":[53],"(e.g.,":[54,95],"TCUs)":[55],"during":[56,147],"their":[57],"operation.":[58],"In":[59,129],"safety-critical":[60],"applications,":[61],"this":[62,130],"lead":[64],"wrong":[66],"DNN":[67],"outcomes":[68],"cause":[70],"unpredictable":[71],"unacceptable":[73],"actions.":[74],"Thus,":[75],"the":[76,91,98,148],"impact":[77],"evaluation":[78],"such":[80],"is":[82],"crucial":[83],"ensure":[85],"that":[86],"TCUs":[87],"GPUs":[89,121],"meet":[90],"safety":[92],"standard":[93],"requirements":[94],"ISO26262).":[96],"Currently,":[97],"assessment":[100],"complex":[102],"concerning":[104],"hardware":[105],"involves":[107],"fault":[108],"injection":[109],"(FI)":[110],"campaigns.":[111],"Unfortunately,":[112],"low-level":[113],"FI":[114,194],"campaigns":[115,195],"might":[116],"be":[117],"computationally":[118,159],"prohibitive":[119],"for":[120],"when":[122],"these":[123],"execute":[124],"massive":[125],"DNNs.":[128],"work,":[131],"we":[132],"propose":[133],"an":[134],"error":[135,190],"modeling":[136,191],"approach":[137,153,192],"accurately":[139],"describe":[140],"corruptions":[141],"from":[142],"permanent":[143],"TCUs,":[146],"operation":[149],"MMs.":[151],"This":[152],"enables":[154],"realistic":[155],"evaluations":[157],"expensive":[160],"MM-based":[161],"workloads,":[162],"resulting":[163],"a":[165,179],"huge":[166],"acceleration":[167],"(up":[168,183],"225X)":[170],"compared":[171],"hardware-level":[173],"FIs.":[174],"Our":[175],"experimental":[176],"results":[177],"show":[178],"very":[180],"good":[181],"accuracy":[182],"$93":[185],"\\%$":[186],"correlation":[187],"between":[188],"our":[189],"conducted":[196],"TCUs).":[198]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
