{"id":"https://openalex.org/W7116789959","doi":"https://doi.org/10.1109/mm.2025.3645670","title":"Silent Data Corruption in Artificial Intelligence: A Growing Challenge for Large-Scale Machine Learning","display_name":"Silent Data Corruption in Artificial Intelligence: A Growing Challenge for Large-Scale Machine Learning","publication_year":2025,"publication_date":"2025-12-22","ids":{"openalex":"https://openalex.org/W7116789959","doi":"https://doi.org/10.1109/mm.2025.3645670"},"language":null,"primary_location":{"id":"doi:10.1109/mm.2025.3645670","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mm.2025.3645670","pdf_url":null,"source":{"id":"https://openalex.org/S59697426","display_name":"IEEE Micro","issn_l":"0272-1732","issn":["0272-1732","1937-4143"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Micro","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049966217","display_name":"Nishant George","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Nishant George","raw_affiliation_strings":["NVIDIA, Austin, TX, USA"],"raw_orcid":"https://orcid.org/0009-0009-1041-2117","affiliations":[{"raw_affiliation_string":"NVIDIA, Austin, TX, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078748445","display_name":"Sudhanva Gurumurthi","orcid":"https://orcid.org/0000-0002-1740-7304"},"institutions":[{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sudhanva Gurumurthi","raw_affiliation_strings":["AMD, Austin, TX, USA"],"raw_orcid":"https://orcid.org/0000-0002-1740-7304","affiliations":[{"raw_affiliation_string":"AMD, Austin, TX, USA","institution_ids":["https://openalex.org/I4210137977"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061044305","display_name":"Vilas Sridharan","orcid":"https://orcid.org/0000-0002-2944-2799"},"institutions":[{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vilas Sridharan","raw_affiliation_strings":["AMD, Austin, TX, USA"],"raw_orcid":"https://orcid.org/0000-0002-2944-2799","affiliations":[{"raw_affiliation_string":"AMD, Austin, TX, USA","institution_ids":["https://openalex.org/I4210137977"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018336899","display_name":"Harish Dattatraya Dixit","orcid":"https://orcid.org/0009-0001-1163-5568"},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Harish Dattatraya Dixit","raw_affiliation_strings":["Meta, Menlo Park, CA, USA"],"raw_orcid":"https://orcid.org/0009-0001-1163-5568","affiliations":[{"raw_affiliation_string":"Meta, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120972261","display_name":"Emel Goksu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Emel Goksu","raw_affiliation_strings":["Meta, Menlo Park, CA, USA"],"raw_orcid":"https://orcid.org/0009-0006-4445-6976","affiliations":[{"raw_affiliation_string":"Meta, Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121001140","display_name":"Bharath Parthasarathy","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bharath Parthasarathy","raw_affiliation_strings":["Google, Mountain View, CA, USA"],"raw_orcid":"https://orcid.org/0009-0008-3566-4591","affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043001710","display_name":"Amber Huffman","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amber Huffman","raw_affiliation_strings":["Google, Mountain View, CA, USA"],"raw_orcid":"https://orcid.org/0009-0004-5364-3998","affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040827902","display_name":"Thiago J. Macieira","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thiago Macieira","raw_affiliation_strings":["Intel, Hillsboro, OR, USA"],"raw_orcid":"https://orcid.org/0009-0006-2739-614X","affiliations":[{"raw_affiliation_string":"Intel, Hillsboro, OR, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101770488","display_name":"Arani Sinha","orcid":"https://orcid.org/0000-0003-2069-3177"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arani Sinha","raw_affiliation_strings":["Intel, Hillsboro, OR, USA"],"raw_orcid":"https://orcid.org/0000-0003-2069-3177","affiliations":[{"raw_affiliation_string":"Intel, Hillsboro, OR, USA","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055871546","display_name":"Dean Liberty","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156213","display_name":"American Rock Mechanics Association","ror":"https://ror.org/05vfrxy92","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210156213"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dean Liberty","raw_affiliation_strings":["ARM, Austin, TX, USA"],"raw_orcid":"https://orcid.org/0009-0002-2182-1548","affiliations":[{"raw_affiliation_string":"ARM, Austin, TX, USA","institution_ids":["https://openalex.org/I4210156213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120957250","display_name":"Lisa Minwell","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156213","display_name":"American Rock Mechanics Association","ror":"https://ror.org/05vfrxy92","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210156213"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lisa Minwell","raw_affiliation_strings":["ARM, Austin, TX, USA"],"raw_orcid":"https://orcid.org/0009-0007-9122-7137","affiliations":[{"raw_affiliation_string":"ARM, Austin, TX, USA","institution_ids":["https://openalex.org/I4210156213"]}]},{"author_position":"last","author":{"id":null,"display_name":"Robert S. Chappell","orcid":"https://orcid.org/0009-0002-2024-3467"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Robert S. Chappell","raw_affiliation_strings":["Microsoft, Redmond, WA, USA"],"raw_orcid":"https://orcid.org/0009-0002-2024-3467","affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5049966217"],"corresponding_institution_ids":["https://openalex.org/I4210127875"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.54502252,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"46","issue":"1","first_page":"66","last_page":"72"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.423799991607666,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.423799991607666,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.21410000324249268,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.08590000122785568,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6934999823570251},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.6340000033378601},{"id":"https://openalex.org/keywords/anomaly-detection","display_name":"Anomaly detection","score":0.5285999774932861},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4790000021457672},{"id":"https://openalex.org/keywords/trustworthiness","display_name":"Trustworthiness","score":0.4781000018119812},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.45329999923706055},{"id":"https://openalex.org/keywords/debugging","display_name":"Debugging","score":0.4499000012874603},{"id":"https://openalex.org/keywords/language-change","display_name":"Language change","score":0.4072999954223633}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8208000063896179},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6934999823570251},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.660099983215332},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.6340000033378601},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6090999841690063},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.5285999774932861},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.48069998621940613},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4790000021457672},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.4781000018119812},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.45329999923706055},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.4499000012874603},{"id":"https://openalex.org/C2780027415","wikidata":"https://www.wikidata.org/wiki/Q524648","display_name":"Language change","level":2,"score":0.4072999954223633},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.39340001344680786},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.35929998755455017},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.3249000012874603},{"id":"https://openalex.org/C2778464652","wikidata":"https://www.wikidata.org/wiki/Q309849","display_name":"Open research","level":2,"score":0.321399986743927},{"id":"https://openalex.org/C152745839","wikidata":"https://www.wikidata.org/wiki/Q5438153","display_name":"Fault detection and isolation","level":3,"score":0.32010000944137573},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.3179999887943268},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2987000048160553},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C175551986","wikidata":"https://www.wikidata.org/wiki/Q47089","display_name":"Fault (geology)","level":2,"score":0.2685000002384186},{"id":"https://openalex.org/C2780535194","wikidata":"https://www.wikidata.org/wiki/Q309901","display_name":"Open data","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C168021876","wikidata":"https://www.wikidata.org/wiki/Q1353446","display_name":"Byzantine fault tolerance","level":3,"score":0.2581999897956848},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.2531999945640564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mm.2025.3645670","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mm.2025.3645670","pdf_url":null,"source":{"id":"https://openalex.org/S59697426","display_name":"IEEE Micro","issn_l":"0272-1732","issn":["0272-1732","1937-4143"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Micro","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.8147748112678528}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1584278023","https://openalex.org/W2989569745","https://openalex.org/W3000371584","https://openalex.org/W3096295711","https://openalex.org/W3171842021","https://openalex.org/W4249144718","https://openalex.org/W4287322404","https://openalex.org/W4380881139","https://openalex.org/W4386212580","https://openalex.org/W4401212149","https://openalex.org/W4401337812","https://openalex.org/W4404954359","https://openalex.org/W4408182455","https://openalex.org/W4408893795","https://openalex.org/W4409328058","https://openalex.org/W4410394392","https://openalex.org/W4410638920","https://openalex.org/W4411172883"],"related_works":[],"abstract_inverted_index":{"As":[0],"Artificial":[1],"Intelligence":[2],"(AI)":[3],"and":[4,13,36,111,123,127,132,138],"Machine":[5],"Learning":[6],"(ML)":[7],"scale":[8],"to":[9,34,61,135],"HPC-class":[10],"workloads,":[11],"reliability":[12,65],"data":[14],"integrity":[15],"emerge":[16],"as":[17],"critical":[18,48],"concerns.":[19],"Silent":[20],"Data":[21],"Corruption":[22],"(SDC),":[23],"or":[24,53,83],"hardware":[25,64,94],"faults":[26,46,72],"that":[27,58,108],"escape":[28],"detection,":[29,121],"poses":[30],"a":[31,86,105],"hidden":[32],"threat":[33],"training":[35],"inference.":[37],"While":[38],"neural":[39],"networks":[40],"can":[41],"sometimes":[42],"absorb":[43],"small":[44],"perturbations,":[45],"in":[47,56,88],"computations":[49],"may":[50],"derail":[51],"convergence":[52],"bias":[54],"results":[55],"ways":[57],"are":[59],"difficult":[60],"diagnose.":[62],"Traditional":[63],"metrics":[66,80],"rely":[67],"on":[68],"rates":[69],"of":[70,93],"device-level":[71],"but":[73],"do":[74],"not":[75],"align":[76],"with":[77],"AI":[78,140],"correctness":[79],"like":[81],"accuracy":[82],"loss,":[84],"leaving":[85],"gap":[87],"understanding":[89],"the":[90],"true":[91],"impact":[92],"errors.":[95],"At":[96],"datacenter":[97],"scale,":[98],"even":[99],"rare":[100],"per-device":[101],"events":[102],"accumulate":[103],"into":[104],"meaningful":[106],"risk":[107],"challenges":[109],"detection":[110],"recovery.":[112],"We":[113],"review":[114],"emerging":[115],"strategies,":[116],"including":[117],"proactive":[118],"screening,":[119],"anomaly":[120],"redundancy,":[122],"algorithm-based":[124],"fault":[125],"tolerance,":[126],"call":[128],"for":[129],"open":[130],"collaboration":[131],"standardized":[133],"benchmarks":[134],"ensure":[136],"resilient":[137],"trustworthy":[139],"systems.":[141]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-12-22T00:00:00"}
