{"id":"https://openalex.org/W7117768918","doi":"https://doi.org/10.1145/3773274.3774280","title":"When Timeouts Fail: Revisiting Fault Detection under Resource Stress in Edge Computing","display_name":"When Timeouts Fail: Revisiting Fault Detection under Resource Stress in Edge Computing","publication_year":2025,"publication_date":"2025-12-01","ids":{"openalex":"https://openalex.org/W7117768918","doi":"https://doi.org/10.1145/3773274.3774280"},"language":null,"primary_location":{"id":"doi:10.1145/3773274.3774280","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3773274.3774280","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th IEEE/ACM International Conference on Utility and Cloud Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3773274.3774280","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020178550","display_name":"Maryam Pourreza","orcid":"https://orcid.org/0009-0009-6639-6339"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Maryam Pourreza","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":"https://orcid.org/0009-0009-6639-6339","affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071484632","display_name":"Priya Narasimhan","orcid":"https://orcid.org/0000-0001-9100-0943"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Priya Narasimhan","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":"https://orcid.org/0000-0001-9100-0943","affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5020178550"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":1.1502,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.8627494,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.275299996137619,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.275299996137619,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.26429998874664307,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.19290000200271606,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/timeout","display_name":"Timeout","score":0.6743999719619751},{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.5938000082969666},{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.505299985408783},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.4587000012397766},{"id":"https://openalex.org/keywords/edge-computing","display_name":"Edge computing","score":0.4571000039577484},{"id":"https://openalex.org/keywords/static-analysis","display_name":"Static analysis","score":0.4507000148296356},{"id":"https://openalex.org/keywords/fault-detection-and-isolation","display_name":"Fault detection and isolation","score":0.42750000953674316},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.35929998755455017},{"id":"https://openalex.org/keywords/system-monitoring","display_name":"System monitoring","score":0.3336000144481659}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.758400022983551},{"id":"https://openalex.org/C31691690","wikidata":"https://www.wikidata.org/wiki/Q1753979","display_name":"Timeout","level":2,"score":0.6743999719619751},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.5938000082969666},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.505299985408783},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.4587000012397766},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.4571000039577484},{"id":"https://openalex.org/C97686452","wikidata":"https://www.wikidata.org/wiki/Q7604153","display_name":"Static analysis","level":2,"score":0.4507000148296356},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.4392000138759613},{"id":"https://openalex.org/C152745839","wikidata":"https://www.wikidata.org/wiki/Q5438153","display_name":"Fault detection and isolation","level":3,"score":0.42750000953674316},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4018999934196472},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.35929998755455017},{"id":"https://openalex.org/C200749887","wikidata":"https://www.wikidata.org/wiki/Q1165574","display_name":"System monitoring","level":2,"score":0.3336000144481659},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.33329999446868896},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.33239999413490295},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.328900009393692},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3188999891281128},{"id":"https://openalex.org/C2779086471","wikidata":"https://www.wikidata.org/wiki/Q2051704","display_name":"False memory","level":3,"score":0.3050000071525574},{"id":"https://openalex.org/C2779628075","wikidata":"https://www.wikidata.org/wiki/Q1253258","display_name":"Downgrade","level":2,"score":0.2985999882221222},{"id":"https://openalex.org/C165021410","wikidata":"https://www.wikidata.org/wiki/Q55564","display_name":"Lossy compression","level":2,"score":0.2858999967575073},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.2745000123977661},{"id":"https://openalex.org/C2780799671","wikidata":"https://www.wikidata.org/wiki/Q17087362","display_name":"Transient (computer programming)","level":2,"score":0.27309998869895935},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.2727999985218048},{"id":"https://openalex.org/C147494362","wikidata":"https://www.wikidata.org/wiki/Q2078905","display_name":"Troubleshooting","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C147224247","wikidata":"https://www.wikidata.org/wiki/Q885373","display_name":"Bloom filter","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2612000107765198},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.2590999901294708},{"id":"https://openalex.org/C175551986","wikidata":"https://www.wikidata.org/wiki/Q47089","display_name":"Fault (geology)","level":2,"score":0.2572999894618988},{"id":"https://openalex.org/C7515471","wikidata":"https://www.wikidata.org/wiki/Q1936882","display_name":"Stress testing (software)","level":2,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3773274.3774280","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3773274.3774280","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th IEEE/ACM International Conference on Utility and Cloud Computing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3773274.3774280","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3773274.3774280","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th IEEE/ACM International Conference on Utility and Cloud Computing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.4510745406150818,"display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W2416799949","https://openalex.org/W2568772110","https://openalex.org/W2804035958","https://openalex.org/W2986404693","https://openalex.org/W2987893823","https://openalex.org/W3000041910","https://openalex.org/W3164950055","https://openalex.org/W4292969716","https://openalex.org/W4293232442","https://openalex.org/W4367044888","https://openalex.org/W4386352759","https://openalex.org/W4390189386","https://openalex.org/W4391307066","https://openalex.org/W4391542510","https://openalex.org/W4413320419"],"related_works":[],"abstract_inverted_index":{"Timeout-based":[0],"failure":[1],"detection":[2,45,139,186],"is":[3,23,110],"widely":[4],"used":[5],"in":[6,112],"distributed":[7],"systems,":[8],"yet":[9],"its":[10],"behavior":[11],"under":[12,46,59,91],"the":[13,51,188],"tight":[14,115],"resource":[15,61,198],"budgets":[16],"and":[17,72,83,88,163,173,197,206],"latency-sensitive":[18],"workloads":[19],"of":[20,56,140],"edge":[21,34,65],"environments":[22],"poorly":[24],"understood.":[25],"Unlike":[26],"datacenters,":[27],"where":[28],"static":[29,57,108],"timeout":[30,167],"defaults":[31],"often":[32,169],"suffice,":[33],"deployments":[35],"risk":[36],"spurious":[37],"failovers":[38],"during":[39],"transient":[40],"slowdowns":[41],"or":[42],"dangerously":[43],"delayed":[44],"genuine":[47],"faults.":[48],"We":[49,143],"present":[50],"first":[52],"systematic":[53],"empirical":[54,180],"study":[55,201],"timeouts":[58],"controlled":[60],"stress":[62],"on":[63,155,160],"heterogeneous":[64],"platforms.":[66],"Using":[67],"a":[68,123,207],"Raspberry":[69,156],"Pi":[70,157],"4B":[71],"Jetson":[73,161],"Nano":[74],"testbed,":[75],"we":[76],"evaluate":[77],"six":[78],"representative":[79],"workloads,":[80],"including":[81],"compute-":[82],"memory-bound":[84],"benchmarks,":[85],"multimedia":[86],"processing,":[87],"CPU/GPU":[89],"inference,":[90],"five":[92],"stressors":[93],"(CPU":[94],"overload,":[95],"memory":[96,164,172],"contention,":[97],"disk":[98],"I/O,":[99],"cache":[100],"thrashing,":[101],"page":[102],"faults).":[103],"Our":[104],"results":[105],"show":[106,145],"that":[107,146,211],"calibration":[109,125],"brittle:":[111],"fault-free":[113],"runs,":[114],"thresholds":[116,137],"yield":[117],"up":[118],"to":[119],"30%":[120],"false":[121,129,195],"positives;":[122],"moderate":[124],"reduces":[126],"these":[127],"no-fault":[128],"positives":[130],"by":[131,190],"\u223c":[132],"40\u201360%,":[133],"while":[134],"conservative":[135],"(long)":[136],"suppress":[138],"real":[141],"slowdowns.":[142],"further":[144],"workload\u2013device":[147],"interplay":[148],"inverts":[149],"common":[150],"assumptions":[151],"(e.g.,":[152],"FFT":[153],"fastest":[154],"but":[158],"slowest":[159],"Nano),":[162],"contention":[165],"dominates":[166],"anomalies,":[168],"with":[170,214],"elevated":[171],"thermal":[174],"signals.":[175],"These":[176],"findings":[177],"provide":[178],"an":[179],"foundation":[181],"for":[182,209],"adaptive,":[183],"resource-aware":[184],"fault":[185],"at":[187],"edge:":[189],"quantifying":[191],"trade-offs":[192],"among":[193],"sensitivity,":[194],"positives,":[196],"overheads,":[199],"our":[200],"offers":[202],"practical":[203],"configuration":[204],"guidance":[205],"baseline":[208],"detectors":[210],"combine":[212],"latency":[213],"lightweight":[215],"system":[216],"metrics.":[217]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-03T09:05:47.796612","created_date":"2025-12-31T00:00:00"}
