{"id":"https://openalex.org/W4386212580","doi":"https://doi.org/10.1109/iolts59296.2023.10224872","title":"Keytone: Silent Data Corruptions at Scale","display_name":"Keytone: Silent Data Corruptions at Scale","publication_year":2023,"publication_date":"2023-07-03","ids":{"openalex":"https://openalex.org/W4386212580","doi":"https://doi.org/10.1109/iolts59296.2023.10224872"},"language":"en","primary_location":{"id":"doi:10.1109/iolts59296.2023.10224872","is_oa":true,"landing_page_url":"https://doi.org/10.1109/iolts59296.2023.10224872","pdf_url":"https://ieeexplore.ieee.org/ielx7/10224820/10224858/10224872.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE 29th International Symposium on On-Line Testing and Robust System Design (IOLTS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/10224820/10224858/10224872.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064882114","display_name":"Harish V. Dixit","orcid":"https://orcid.org/0000-0002-4381-7331"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Harish Dixit","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5064882114"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.4999,"has_fulltext":true,"cited_by_count":35,"citation_normalized_percentile":{"value":0.97103624,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"2"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9799000024795532,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9661999940872192,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7683557271957397},{"id":"https://openalex.org/keywords/debugging","display_name":"Debugging","score":0.7677529454231262},{"id":"https://openalex.org/keywords/datapath","display_name":"Datapath","score":0.757684051990509},{"id":"https://openalex.org/keywords/software-bug","display_name":"Software bug","score":0.5384504795074463},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.5262921452522278},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5172622799873352},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.48902472853660583},{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.4410867989063263},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.44107741117477417},{"id":"https://openalex.org/keywords/subroutine","display_name":"Subroutine","score":0.4325621426105499},{"id":"https://openalex.org/keywords/reliability-engineering","display_name":"Reliability engineering","score":0.3610503077507019},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.323733925819397},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.30939608812332153},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2895280420780182},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11966767907142639}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7683557271957397},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.7677529454231262},{"id":"https://openalex.org/C2781198647","wikidata":"https://www.wikidata.org/wiki/Q1633673","display_name":"Datapath","level":2,"score":0.757684051990509},{"id":"https://openalex.org/C1009929","wikidata":"https://www.wikidata.org/wiki/Q179550","display_name":"Software bug","level":3,"score":0.5384504795074463},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.5262921452522278},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5172622799873352},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.48902472853660583},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.4410867989063263},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.44107741117477417},{"id":"https://openalex.org/C96147967","wikidata":"https://www.wikidata.org/wiki/Q190686","display_name":"Subroutine","level":2,"score":0.4325621426105499},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.3610503077507019},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.323733925819397},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.30939608812332153},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2895280420780182},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11966767907142639},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iolts59296.2023.10224872","is_oa":true,"landing_page_url":"https://doi.org/10.1109/iolts59296.2023.10224872","pdf_url":"https://ieeexplore.ieee.org/ielx7/10224820/10224858/10224872.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE 29th International Symposium on On-Line Testing and Robust System Design (IOLTS)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1109/iolts59296.2023.10224872","is_oa":true,"landing_page_url":"https://doi.org/10.1109/iolts59296.2023.10224872","pdf_url":"https://ieeexplore.ieee.org/ielx7/10224820/10224858/10224872.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE 29th International Symposium on On-Line Testing and Robust System Design (IOLTS)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.8299999833106995,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4386212580.pdf","grobid_xml":"https://content.openalex.org/works/W4386212580.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1483845062","https://openalex.org/W2390095984","https://openalex.org/W2115737686","https://openalex.org/W2162923618","https://openalex.org/W3102446781","https://openalex.org/W2617064411","https://openalex.org/W2295679071","https://openalex.org/W1964713618","https://openalex.org/W2027487876","https://openalex.org/W2135181107"],"abstract_inverted_index":{"Silent":[0,56],"data":[1,61,125],"corruptions":[2,14],"(SDC)":[3],"in":[4,35,60,92,98],"hardware":[5,130],"impact":[6],"computational":[7],"integrity":[8],"for":[9,103],"large-scale":[10,73],"applications.":[11,55],"Sources":[12],"of":[13,67,81,88,90,100],"include":[15],"datapath":[16],"dependencies,":[17],"temperature":[18],"variance,":[19],"and":[20,47,63,132],"age":[21],"among":[22],"other":[23],"silicon":[24],"factors.":[25],"These":[26],"errors":[27,42,57],"do":[28],"not":[29,128],"leave":[30],"any":[31],"record":[32],"or":[33],"trace":[34],"system":[36],"logs.":[37],"As":[38],"a":[39,78,110],"result,":[40],"silent":[41,82,124],"stay":[43],"undetected":[44],"within":[45],"workloads,":[46],"can":[48,58,64],"propagate":[49],"across":[50,86,113],"the":[51,54],"stack":[52],"to":[53],"result":[59],"loss":[62],"require":[65],"months":[66],"debug":[68],"engineering":[69],"time.":[70],"In":[71],"our":[72,93],"infrastructure,":[74],"we":[75,120],"have":[76],"run":[77],"vast":[79],"library":[80],"error":[83],"test":[84],"scenarios":[85],"hundreds":[87,99],"thousands":[89],"machines":[91],"fleet.":[94],"This":[95],"has":[96],"resulted":[97],"CPUs":[101],"detected":[102],"these":[104],"errors,":[105],"showing":[106],"that":[107,122],"SDCs":[108],"are":[109],"systemic":[111],"issue":[112],"device":[114],"generations.":[115],"Based":[116],"on":[117],"this":[118],"experience,":[119],"determine":[121],"reducing":[123],"corruption":[126],"requires":[127],"only":[129],"resiliency":[131],"production":[133],"detection":[134],"mechanisms,":[135],"but":[136],"also":[137],"robust":[138],"fault-tolerant":[139],"software":[140],"architectures.":[141]},"counts_by_year":[{"year":2025,"cited_by_count":21},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3}],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
