{"id":"https://openalex.org/W2744654859","doi":"https://doi.org/10.1109/tpds.2017.2735971","title":"Toward General Software Level Silent Data Corruption Detection for Parallel Applications","display_name":"Toward General Software Level Silent Data Corruption Detection for Parallel Applications","publication_year":2017,"publication_date":"2017-08-04","ids":{"openalex":"https://openalex.org/W2744654859","doi":"https://doi.org/10.1109/tpds.2017.2735971","mag":"2744654859"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2017.2735971","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2017.2735971","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.osti.gov/biblio/1413980","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044563928","display_name":"Eduardo Berrocal","orcid":"https://orcid.org/0000-0003-0570-7833"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Eduardo Berrocal","raw_affiliation_strings":["Department of Computer Science, Illinois Institute of Technology, Chicago, IL"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Illinois Institute of Technology, Chicago, IL","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013543252","display_name":"Leonardo Bautista-Gomez","orcid":"https://orcid.org/0000-0002-0814-5779"},"institutions":[{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Leonardo Bautista-Gomez","raw_affiliation_strings":["Barcelona Supercomputing Center, Barcelona, Spain"],"affiliations":[{"raw_affiliation_string":"Barcelona Supercomputing Center, Barcelona, Spain","institution_ids":["https://openalex.org/I9617848","https://openalex.org/I2799803557"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103124363","display_name":"Sheng Di","orcid":"https://orcid.org/0000-0002-9935-5674"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sheng Di","raw_affiliation_strings":["Argonne National Laboratory, Lemont, IL"],"affiliations":[{"raw_affiliation_string":"Argonne National Laboratory, Lemont, IL","institution_ids":["https://openalex.org/I1282105669"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060400468","display_name":"Zhiling Lan","orcid":"https://orcid.org/0000-0002-1047-8724"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhiling Lan","raw_affiliation_strings":["Department of Computer Science, Illinois Institute of Technology, Chicago, IL"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Illinois Institute of Technology, Chicago, IL","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046613458","display_name":"Franck Cappello","orcid":"https://orcid.org/0000-0002-7890-3934"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Franck Cappello","raw_affiliation_strings":["Argonne National Laboratory, Lemont, IL"],"affiliations":[{"raw_affiliation_string":"Argonne National Laboratory, Lemont, IL","institution_ids":["https://openalex.org/I1282105669"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5044563928"],"corresponding_institution_ids":["https://openalex.org/I180949307"],"apc_list":null,"apc_paid":null,"fwci":1.7544,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.87067963,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"28","issue":"12","first_page":"3642","last_page":"3655"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8753588199615479},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.7584798336029053},{"id":"https://openalex.org/keywords/replicate","display_name":"Replicate","score":0.6867706775665283},{"id":"https://openalex.org/keywords/replication","display_name":"Replication (statistics)","score":0.6273525357246399},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.47753965854644775},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.4626840651035309},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.43821391463279724},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4008164703845978},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1348298192024231}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8753588199615479},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.7584798336029053},{"id":"https://openalex.org/C2781162219","wikidata":"https://www.wikidata.org/wiki/Q26250693","display_name":"Replicate","level":2,"score":0.6867706775665283},{"id":"https://openalex.org/C12590798","wikidata":"https://www.wikidata.org/wiki/Q3933199","display_name":"Replication (statistics)","level":2,"score":0.6273525357246399},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.47753965854644775},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.4626840651035309},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.43821391463279724},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4008164703845978},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1348298192024231},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpds.2017.2735971","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2017.2735971","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},{"id":"pmh:oai:osti.gov:1413980","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1413980","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:osti.gov:1413980","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1413980","pdf_url":null,"source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.8199999928474426,"id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G5344902320","display_name":null,"funder_award_id":"CCF-1422009","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8639584206","display_name":null,"funder_award_id":"CNS-1320125","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320313813","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66"},{"id":"https://openalex.org/F4320320883","display_name":"Agence Nationale de la Recherche","ror":"https://ror.org/00rbzpz17"},{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"},{"id":"https://openalex.org/F4320337392","display_name":"Division of Electrical, Communications and Cyber Systems","ror":"https://ror.org/01krpsy48"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W12380913","https://openalex.org/W79276452","https://openalex.org/W1526700165","https://openalex.org/W1898823215","https://openalex.org/W1905756341","https://openalex.org/W1959635528","https://openalex.org/W1978574709","https://openalex.org/W1986905947","https://openalex.org/W1989331073","https://openalex.org/W1993660990","https://openalex.org/W1996561075","https://openalex.org/W2005998283","https://openalex.org/W2022749544","https://openalex.org/W2053659711","https://openalex.org/W2068790862","https://openalex.org/W2069947399","https://openalex.org/W2073945818","https://openalex.org/W2083613288","https://openalex.org/W2087387294","https://openalex.org/W2088552220","https://openalex.org/W2100568847","https://openalex.org/W2104967192","https://openalex.org/W2113036804","https://openalex.org/W2116059696","https://openalex.org/W2118033476","https://openalex.org/W2120591095","https://openalex.org/W2122177687","https://openalex.org/W2123646449","https://openalex.org/W2127930651","https://openalex.org/W2128511938","https://openalex.org/W2131175011","https://openalex.org/W2131529479","https://openalex.org/W2143137068","https://openalex.org/W2528486527","https://openalex.org/W2757708288","https://openalex.org/W4240029073","https://openalex.org/W4242361160","https://openalex.org/W4248195866","https://openalex.org/W6600518642","https://openalex.org/W6603243439","https://openalex.org/W6678359629"],"related_works":["https://openalex.org/W2053915529","https://openalex.org/W2111659373","https://openalex.org/W3013364101","https://openalex.org/W2378141881","https://openalex.org/W4230748207","https://openalex.org/W2025865172","https://openalex.org/W4388523091","https://openalex.org/W4220767635","https://openalex.org/W4221167770","https://openalex.org/W4253026353"],"abstract_inverted_index":{"Silent":[0],"data":[1,38,102],"corruption":[2,142],"(SDC)":[3],"poses":[4],"a":[5,62,132,141,147],"great":[6],"challenge":[7],"for":[8,119],"high-performance":[9],"computing":[10],"(HPC)":[11],"applications":[12,31,60,165,181],"as":[13,70],"we":[14,76,86,110,130,162],"move":[15],"to":[16,26,47,61,80],"extreme-scale":[17],"systems.":[18],"Mechanisms":[19],"have":[20,87],"been":[21],"proposed":[22],"that":[23,89,140,173,193],"are":[24,54],"able":[25],"detect":[27],"SDC":[28],"in":[29,43],"HPC":[30],"by":[32,146],"using":[33],"the":[34,37,98,106,121,138,179,190],"peculiarities":[35],"of":[36,93,101,151,194],"(more":[39],"specifically,":[40,85],"its":[41],"\u201csmoothness\u201d":[42],"time":[44],"and":[45,114,157],"space)":[46],"make":[48],"predictions.":[49],"However,":[50],"these":[51],"data-analytic":[52,123],"solutions":[53,68],"still":[55],"far":[56],"from":[57],"fully":[58],"protecting":[59],"level":[63,100],"comparable":[64],"with":[65,167,183,197],"more":[66],"expensive":[67],"such":[69],"full":[71,195],"replication.":[72],"In":[73,128,159],"this":[74,82],"work,":[75],"propose":[77,131],"partial":[78],"replication":[79],"overcome":[81],"limitation.":[83],"More":[84],"observed":[88],"not":[90],"all":[91],"processes":[92,118],"an":[94],"MPI":[95,180],"application":[96],"experience":[97],"same":[99,107],"variability":[103],"at":[104],"exactly":[105],"time.":[108],"Thus,":[109],"can":[111,177],"smartly":[112],"choose":[113],"replicate":[115],"only":[116],"those":[117],"which":[120],"lightweight":[122],"detectors":[124],"would":[125],"perform":[126],"poorly.":[127],"addition,":[129],"new":[133,175],"evaluation":[134],"method":[135],"based":[136],"on":[137,189],"probability":[139],"will":[143],"pass":[144],"unnoticed":[145],"particular":[148],"detector":[149],"(instead":[150],"just":[152],"reporting":[153],"overall":[154],"single-bit":[155],"precision":[156],"recall).":[158],"our":[160,174],"experiments,":[161],"use":[163],"four":[164],"dealing":[166],"different":[168],"explosions.":[169],"Our":[170],"results":[171],"indicate":[172],"approach":[176],"protect":[178],"analyzed":[182],"7-70":[184],"percent":[185],"less":[186],"overhead":[187],"(depending":[188],"application)":[191],"than":[192],"duplication":[196],"similar":[198],"detection":[199],"recall.":[200]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
