{"id":"https://openalex.org/W2973130830","doi":"https://doi.org/10.1109/tnet.2019.2938228","title":"Understanding the Limits of Passive Realtime Datacenter Fault Detection and Localization","display_name":"Understanding the Limits of Passive Realtime Datacenter Fault Detection and Localization","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2973130830","doi":"https://doi.org/10.1109/tnet.2019.2938228","mag":"2973130830"},"language":"en","primary_location":{"id":"doi:10.1109/tnet.2019.2938228","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnet.2019.2938228","pdf_url":null,"source":{"id":"https://openalex.org/S62238642","display_name":"IEEE/ACM Transactions on Networking","issn_l":"1063-6692","issn":["1063-6692","1558-2566"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Networking","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101936209","display_name":"Arjun Roy","orcid":"https://orcid.org/0000-0003-2864-9111"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Arjun Roy","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California at San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California at San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079570779","display_name":"Rajdeep Das","orcid":"https://orcid.org/0000-0003-0513-4967"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rajdeep Das","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California at San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California at San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072231297","display_name":"Hongyi Zeng","orcid":"https://orcid.org/0000-0002-4205-4678"},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]},{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hongyi Zeng","raw_affiliation_strings":["Facebook Inc., Menlo Park, CA, USA"],"affiliations":[{"raw_affiliation_string":"Facebook Inc., Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210114444","https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044615793","display_name":"Jasmeet Bagga","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]},{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jasmeet Bagga","raw_affiliation_strings":["Facebook Inc., Menlo Park, CA, USA"],"affiliations":[{"raw_affiliation_string":"Facebook Inc., Menlo Park, CA, USA","institution_ids":["https://openalex.org/I4210114444","https://openalex.org/I4210099336"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029890607","display_name":"Alex C. Snoeren","orcid":"https://orcid.org/0000-0001-5679-3888"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alex C. Snoeren","raw_affiliation_strings":["Department of Computer Science and Engineering, University of California at San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, University of California at San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101936209"],"corresponding_institution_ids":["https://openalex.org/I36258959"],"apc_list":null,"apc_paid":null,"fwci":0.8842,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.7699104,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"27","issue":"5","first_page":"2001","last_page":"2014"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10714","display_name":"Software-Defined Networks and 5G","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8514396548271179},{"id":"https://openalex.org/keywords/router","display_name":"Router","score":0.7679566144943237},{"id":"https://openalex.org/keywords/network-packet","display_name":"Network packet","score":0.6343639492988586},{"id":"https://openalex.org/keywords/host","display_name":"Host (biology)","score":0.6209345459938049},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.6026769280433655},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.5228031873703003},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.48625075817108154},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.4606013894081116},{"id":"https://openalex.org/keywords/fault-detection-and-isolation","display_name":"Fault detection and isolation","score":0.4535641372203827},{"id":"https://openalex.org/keywords/fault","display_name":"Fault (geology)","score":0.4475279748439789},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.40052562952041626},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1788247525691986}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8514396548271179},{"id":"https://openalex.org/C2775896111","wikidata":"https://www.wikidata.org/wiki/Q642560","display_name":"Router","level":2,"score":0.7679566144943237},{"id":"https://openalex.org/C158379750","wikidata":"https://www.wikidata.org/wiki/Q214111","display_name":"Network packet","level":2,"score":0.6343639492988586},{"id":"https://openalex.org/C126831891","wikidata":"https://www.wikidata.org/wiki/Q221673","display_name":"Host (biology)","level":2,"score":0.6209345459938049},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.6026769280433655},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.5228031873703003},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.48625075817108154},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.4606013894081116},{"id":"https://openalex.org/C152745839","wikidata":"https://www.wikidata.org/wiki/Q5438153","display_name":"Fault detection and isolation","level":3,"score":0.4535641372203827},{"id":"https://openalex.org/C175551986","wikidata":"https://www.wikidata.org/wiki/Q47089","display_name":"Fault (geology)","level":2,"score":0.4475279748439789},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.40052562952041626},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1788247525691986},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C165205528","wikidata":"https://www.wikidata.org/wiki/Q83371","display_name":"Seismology","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C172707124","wikidata":"https://www.wikidata.org/wiki/Q423488","display_name":"Actuator","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tnet.2019.2938228","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnet.2019.2938228","pdf_url":null,"source":{"id":"https://openalex.org/S62238642","display_name":"IEEE/ACM Transactions on Networking","issn_l":"1063-6692","issn":["1063-6692","1558-2566"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Networking","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8064500458","display_name":null,"funder_award_id":"CNS-1422240","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8746514431","display_name":null,"funder_award_id":"CNS-1564185","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W31020079","https://openalex.org/W1408671314","https://openalex.org/W1476041289","https://openalex.org/W1581005283","https://openalex.org/W1987034518","https://openalex.org/W1988150362","https://openalex.org/W1990249073","https://openalex.org/W1998471240","https://openalex.org/W2016029184","https://openalex.org/W2035363137","https://openalex.org/W2085796834","https://openalex.org/W2098081907","https://openalex.org/W2099086552","https://openalex.org/W2099657323","https://openalex.org/W2100830825","https://openalex.org/W2100895597","https://openalex.org/W2103110737","https://openalex.org/W2106263082","https://openalex.org/W2107994908","https://openalex.org/W2114501453","https://openalex.org/W2115303222","https://openalex.org/W2123439262","https://openalex.org/W2124165698","https://openalex.org/W2127577941","https://openalex.org/W2130531694","https://openalex.org/W2143522309","https://openalex.org/W2144159136","https://openalex.org/W2146012756","https://openalex.org/W2165202235","https://openalex.org/W2167025919","https://openalex.org/W2173213060","https://openalex.org/W2235236736","https://openalex.org/W2341900579","https://openalex.org/W2502136739","https://openalex.org/W2604906639","https://openalex.org/W2964173183","https://openalex.org/W4238465620","https://openalex.org/W4238860748","https://openalex.org/W4242170712","https://openalex.org/W4285719527","https://openalex.org/W6601222128","https://openalex.org/W6628268946","https://openalex.org/W6659235122","https://openalex.org/W6689789221","https://openalex.org/W6704552889","https://openalex.org/W6736269595","https://openalex.org/W6748333624"],"related_works":["https://openalex.org/W2122026593","https://openalex.org/W1588358165","https://openalex.org/W4237683758","https://openalex.org/W2582203024","https://openalex.org/W2370711413","https://openalex.org/W2375932043","https://openalex.org/W2052038519","https://openalex.org/W2841075164","https://openalex.org/W1980506749","https://openalex.org/W2375594474"],"abstract_inverted_index":{"Datacenters":[0],"are":[1,26,39],"characterized":[2],"by":[3],"large":[4],"scale,":[5],"stringent":[6],"reliability":[7],"requirements,":[8],"and":[9,84,110,118],"significant":[10,29],"application":[11],"diversity.":[12],"However,":[13],"the":[14,80],"realities":[15],"of":[16,31,53,82,141],"employing":[17],"hardware":[18],"with":[19,106],"non-zero":[20],"failure":[21],"rates":[22],"mean":[23],"that":[24,33],"datacenters":[25],"subject":[27],"to":[28,71,78,94,115],"numbers":[30],"failures":[32,38],"can":[34,45,65],"impact":[35],"performance.":[36],"Moreover,":[37],"not":[40],"always":[41],"obvious;":[42],"network":[43,108],"components":[44],"fail":[46],"partially,":[47],"dropping":[48],"or":[49,62],"delaying":[50],"only":[51],"subsets":[52],"packets.":[54],"Thus,":[55],"traditional":[56],"fault":[57],"detection":[58],"techniques":[59,114],"involving":[60],"end-host":[61,91,102],"router-based":[63],"statistics":[64],"fall":[66],"short":[67],"in":[68,128],"their":[69],"ability":[70],"identify":[72,116],"these":[73],"errors.":[74],"We":[75,124],"describe":[76],"how":[77],"expedite":[79],"process":[81],"detecting":[83],"localizing":[85],"partial":[86],"datacenter":[87,96],"faults":[88],"using":[89],"an":[90],"method":[92],"generalizable":[93],"most":[95],"applications.":[97],"In":[98],"particular,":[99],"we":[100],"correlate":[101],"transport-layer":[103],"flow":[104],"metrics":[105],"per-flow":[107],"paths":[109],"apply":[111],"statistical":[112],"analysis":[113],"outliers":[117],"localize":[119],"faulty":[120],"links":[121],"and/or":[122],"switches.":[123],"evaluate":[125],"our":[126],"approach":[127],"a":[129,139],"production":[130],"Facebook":[131],"front-end":[132],"datacenter,":[133],"focusing":[134],"on":[135],"its":[136],"effectiveness":[137],"across":[138],"range":[140],"traffic":[142],"patterns.":[143]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
