{"id":"https://openalex.org/W7123691289","doi":"https://doi.org/10.1145/3772052.3772243","title":"FailLite: Failure-Resilient Model Serving for Resource-Constrained Edge Environments","display_name":"FailLite: Failure-Resilient Model Serving for Resource-Constrained Edge Environments","publication_year":2025,"publication_date":"2025-11-19","ids":{"openalex":"https://openalex.org/W7123691289","doi":"https://doi.org/10.1145/3772052.3772243"},"language":null,"primary_location":{"id":"doi:10.1145/3772052.3772243","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772052.3772243","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3772052.3772243","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100326378","display_name":"Wu Li","orcid":"https://orcid.org/0000-0001-7115-1517"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Li Wu","raw_affiliation_strings":["University of Massachusetts Amherst, Amherst, Massachusetts, USA"],"raw_orcid":"https://orcid.org/0000-0001-7115-1517","affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst, Amherst, Massachusetts, USA","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090531845","display_name":"Walid A. Hanafy","orcid":"https://orcid.org/0000-0001-5765-8194"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Walid Hanafy","raw_affiliation_strings":["University of Massachusetts Amherst, Amherst, USA"],"raw_orcid":"https://orcid.org/0000-0001-5765-8194","affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst, Amherst, USA","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043506947","display_name":"T. Abdelzaher","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tarek Abdelzaher","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Champaign, USA"],"raw_orcid":"https://orcid.org/0000-0003-3883-7220","affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Champaign, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059310848","display_name":"David Irwin","orcid":"https://orcid.org/0000-0003-1722-4927"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Irwin","raw_affiliation_strings":["University of Massachusetts Amherst, Amherst, USA"],"raw_orcid":"https://orcid.org/0000-0003-1722-4927","affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst, Amherst, USA","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094363642","display_name":"Jesse Milzman","orcid":null},"institutions":[{"id":"https://openalex.org/I166416128","display_name":"DEVCOM Army Research Laboratory","ror":"https://ror.org/011hc8f90","country_code":"US","type":"government","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I166416128","https://openalex.org/I2802705668","https://openalex.org/I4210154437"]},{"id":"https://openalex.org/I4210088792","display_name":"United States Army","ror":"https://ror.org/00afsp483","country_code":"US","type":"government","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I4210088792"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jesse Milzman","raw_affiliation_strings":["Army Research Laboratory, New York, USA"],"raw_orcid":"https://orcid.org/0000-0003-4937-8912","affiliations":[{"raw_affiliation_string":"Army Research Laboratory, New York, USA","institution_ids":["https://openalex.org/I4210088792","https://openalex.org/I166416128"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078752355","display_name":"P. Shenoy","orcid":null},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Prashant Shenoy","raw_affiliation_strings":["University of Massachusetts Amherst, Amherst, USA"],"raw_orcid":"https://orcid.org/0000-0002-5435-1901","affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst, Amherst, USA","institution_ids":["https://openalex.org/I24603500"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.7179,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.95842632,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"416","last_page":"429"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.11089999973773956,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.11089999973773956,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.10459999740123749,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.10000000149011612,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/failover","display_name":"Failover","score":0.9882000088691711},{"id":"https://openalex.org/keywords/testbed","display_name":"Testbed","score":0.664900004863739},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.632099986076355},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5015000104904175},{"id":"https://openalex.org/keywords/mean-time-between-failures","display_name":"Mean time between failures","score":0.46219998598098755},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.4535999894142151},{"id":"https://openalex.org/keywords/survivability","display_name":"Survivability","score":0.451200008392334}],"concepts":[{"id":"https://openalex.org/C109751979","wikidata":"https://www.wikidata.org/wiki/Q998767","display_name":"Failover","level":2,"score":0.9882000088691711},{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.664900004863739},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6527000069618225},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.632099986076355},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5015000104904175},{"id":"https://openalex.org/C44154001","wikidata":"https://www.wikidata.org/wiki/Q754940","display_name":"Mean time between failures","level":3,"score":0.46219998598098755},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.4611000120639801},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.4535999894142151},{"id":"https://openalex.org/C2781133158","wikidata":"https://www.wikidata.org/wiki/Q1088669","display_name":"Survivability","level":2,"score":0.451200008392334},{"id":"https://openalex.org/C12590798","wikidata":"https://www.wikidata.org/wiki/Q3933199","display_name":"Replication (statistics)","level":2,"score":0.43619999289512634},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.41769999265670776},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3853999972343445},{"id":"https://openalex.org/C65813073","wikidata":"https://www.wikidata.org/wiki/Q1622420","display_name":"High availability","level":2,"score":0.37369999289512634},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.373199999332428},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3393999934196472},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.31859999895095825},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3181000053882599},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.30970001220703125},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.3050999939441681},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.289000004529953},{"id":"https://openalex.org/C163164238","wikidata":"https://www.wikidata.org/wiki/Q2737027","display_name":"Failure rate","level":2,"score":0.27730000019073486},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.25200000405311584}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3772052.3772243","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772052.3772243","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3772052.3772243","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772052.3772243","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.4377698004245758,"id":"https://metadata.un.org/sdg/8"}],"awards":[{"id":"https://openalex.org/G1606157890","display_name":null,"funder_award_id":"DE-EE0010143","funder_id":"https://openalex.org/F4320310514","funder_display_name":"Department of Energy and Climate Change"},{"id":"https://openalex.org/G1637379582","display_name":null,"funder_award_id":"W911NF-17-2-0196","funder_id":"https://openalex.org/F4320338295","funder_display_name":"Army Research Laboratory"},{"id":"https://openalex.org/G7664801655","display_name":null,"funder_award_id":"2213636, 2105494, 2211302, 2211888, 2325956, 23091241,19250001","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310514","display_name":"Department of Energy and Climate Change","ror":"https://ror.org/019ya6433"},{"id":"https://openalex.org/F4320338295","display_name":"Army Research Laboratory","ror":"https://ror.org/011hc8f90"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W2135099885","https://openalex.org/W2194775991","https://openalex.org/W2568772110","https://openalex.org/W2767260595","https://openalex.org/W2910612215","https://openalex.org/W2920700677","https://openalex.org/W2941938531","https://openalex.org/W2963037989","https://openalex.org/W2964434710","https://openalex.org/W2980856918","https://openalex.org/W2982157693","https://openalex.org/W2988595046","https://openalex.org/W2997768846","https://openalex.org/W3039010666","https://openalex.org/W3047492485","https://openalex.org/W3088405768","https://openalex.org/W3100985894","https://openalex.org/W3107241502","https://openalex.org/W3130823781","https://openalex.org/W3132412959","https://openalex.org/W3176653330","https://openalex.org/W4293107892","https://openalex.org/W4317935380","https://openalex.org/W4366564134","https://openalex.org/W4367046915","https://openalex.org/W4375851947","https://openalex.org/W4380881139","https://openalex.org/W4390189386","https://openalex.org/W4394892775","https://openalex.org/W4400433711","https://openalex.org/W4401751813"],"related_works":[],"abstract_inverted_index":{"Model":[0],"serving":[1,26,58],"systems":[2],"have":[3,20],"become":[4],"popular":[5],"for":[6,11,23,90,109],"deploying":[7],"deep":[8],"learning":[9],"models":[10,137],"various":[12],"latency-sensitive":[13],"inference":[14],"tasks.":[15],"While":[16],"traditional":[17],"replication-based":[18],"methods":[19,31],"been":[21],"used":[22],"failure-resilient":[24,56],"model":[25,57,69],"in":[27,35,155],"the":[28,67,75,110,178],"cloud,":[29],"such":[30],"are":[32],"often":[33],"infeasible":[34],"edge":[36,128,164],"environments":[37],"due":[38],"to":[39,86,101,106,177],"significant":[40],"resource":[41],"constraints":[42],"that":[43,60,82,139],"preclude":[44],"full":[45,116],"replication.":[46],"To":[47],"address":[48],"this":[49,51],"problem,":[50],"paper":[52],"presents":[53],"FailLite,":[54],"a":[55,63,71,115,152],"system":[59,120],"employs":[61],"(i)":[62],"heterogeneous":[64],"replication":[65],"where":[66,161],"failover":[68,89,100],"is":[70],"smaller":[72],"variant":[73],"of":[74,118,163],"original":[76],"one,":[77],"(ii)":[78],"an":[79,126],"intelligent":[80],"approach":[81],"uses":[83],"warm":[84],"replicas":[85],"ensure":[87],"quick":[88],"critical":[91],"applications":[92,145],"while":[93],"using":[94,135],"cold":[95],"replicas,":[96],"and":[97,121,130,150],"(iii)":[98],"progressive":[99],"provide":[102],"low":[103],"mean":[104],"time":[105],"recovery":[107,170],"(MTTR)":[108],"remaining":[111],"applications.":[112],"We":[113],"implement":[114],"prototype":[117],"our":[119],"demonstrate":[122],"its":[123],"efficacy":[124],"on":[125],"experimental":[127],"testbed":[129],"large-scale":[131],"simulations.":[132],"Our":[133],"results":[134],"27":[136],"show":[138],"FailLite":[140,168],"can":[141],"recover":[142],"all":[143],"failed":[144],"with":[146],"2\u00d7":[147],"lower":[148],"MTTR":[149],"only":[151],"0.6%":[153],"reduction":[154],"accuracy.":[156],"Under":[157],"extreme":[158],"failure":[159],"scenarios,":[160],"50%":[162],"sites":[165],"fail":[166],"simultaneously,":[167],"improves":[169],"rate":[171],"by":[172],"at":[173],"least":[174],"39.3%":[175],"compared":[176],"baseline":[179],"methods.":[180]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-14T00:00:00"}
