{"id":"https://openalex.org/W7117649764","doi":"https://doi.org/10.1145/3773274.3774267","title":"Splitwise: Collaborative Edge\u2013Cloud Inference for LLMs via Lyapunov-Assisted DRL","display_name":"Splitwise: Collaborative Edge\u2013Cloud Inference for LLMs via Lyapunov-Assisted DRL","publication_year":2025,"publication_date":"2025-12-01","ids":{"openalex":"https://openalex.org/W7117649764","doi":"https://doi.org/10.1145/3773274.3774267"},"language":null,"primary_location":{"id":"doi:10.1145/3773274.3774267","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3773274.3774267","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th IEEE/ACM International Conference on Utility and Cloud Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3773274.3774267","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081387222","display_name":"Abolfazl Younesi","orcid":"https://orcid.org/0009-0003-0052-6475"},"institutions":[{"id":"https://openalex.org/I190249584","display_name":"Universit\u00e4t Innsbruck","ror":"https://ror.org/054pv6659","country_code":"AT","type":"education","lineage":["https://openalex.org/I190249584"]},{"id":"https://openalex.org/I4210121717","display_name":"Tirol Kliniken","ror":"https://ror.org/028ze1052","country_code":"AT","type":"healthcare","lineage":["https://openalex.org/I4210121717"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Abolfazl Younesi","raw_affiliation_strings":["Departement Computer Science, University of Innsbruck, Innsbruck, Tirol, Austria"],"raw_orcid":"https://orcid.org/0009-0003-0052-6475","affiliations":[{"raw_affiliation_string":"Departement Computer Science, University of Innsbruck, Innsbruck, Tirol, Austria","institution_ids":["https://openalex.org/I4210121717","https://openalex.org/I190249584"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5117291301","display_name":"Abbas Shabrang Maryan","orcid":null},"institutions":[{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Abbas Shabrang Maryan","raw_affiliation_strings":["Sharif University of Technology, Tehran, Iran"],"raw_orcid":"https://orcid.org/0009-0003-0735-2455","affiliations":[{"raw_affiliation_string":"Sharif University of Technology, Tehran, Iran","institution_ids":["https://openalex.org/I133529467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106737354","display_name":"Elyas Oustad","orcid":null},"institutions":[{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Elyas Oustad","raw_affiliation_strings":["Sharif University of Technology, Tehran, Iran"],"raw_orcid":"https://orcid.org/0009-0006-1456-356X","affiliations":[{"raw_affiliation_string":"Sharif University of Technology, Tehran, Iran","institution_ids":["https://openalex.org/I133529467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011672380","display_name":"Zahra Najafabadi Samani","orcid":"https://orcid.org/0000-0001-5182-9087"},"institutions":[{"id":"https://openalex.org/I190249584","display_name":"Universit\u00e4t Innsbruck","ror":"https://ror.org/054pv6659","country_code":"AT","type":"education","lineage":["https://openalex.org/I190249584"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Zahra Najafabadi Samani","raw_affiliation_strings":["University of Innsbruck, Innsbruck, Austria"],"raw_orcid":"https://orcid.org/0000-0001-5182-9087","affiliations":[{"raw_affiliation_string":"University of Innsbruck, Innsbruck, Austria","institution_ids":["https://openalex.org/I190249584"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035446526","display_name":"Mohsen Ansari","orcid":"https://orcid.org/0000-0002-4670-8608"},"institutions":[{"id":"https://openalex.org/I133529467","display_name":"Sharif University of Technology","ror":"https://ror.org/024c2fq17","country_code":"IR","type":"education","lineage":["https://openalex.org/I133529467"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Mohsen Ansari","raw_affiliation_strings":["Sharif University of Technology, Tehran, Iran"],"raw_orcid":"https://orcid.org/0000-0002-4670-8608","affiliations":[{"raw_affiliation_string":"Sharif University of Technology, Tehran, Iran","institution_ids":["https://openalex.org/I133529467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000119692","display_name":"Thomas Fahringer","orcid":"https://orcid.org/0000-0003-4293-1228"},"institutions":[{"id":"https://openalex.org/I190249584","display_name":"Universit\u00e4t Innsbruck","ror":"https://ror.org/054pv6659","country_code":"AT","type":"education","lineage":["https://openalex.org/I190249584"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Thomas Fahringer","raw_affiliation_strings":["University of Innsbruck, Austria, Innsbruck, Austria"],"raw_orcid":"https://orcid.org/0000-0003-4293-1228","affiliations":[{"raw_affiliation_string":"University of Innsbruck, Austria, Innsbruck, Austria","institution_ids":["https://openalex.org/I190249584"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9349,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82703946,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.2581999897956848,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.2581999897956848,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.21660000085830688,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.15690000355243683,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.604200005531311},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.508400022983551},{"id":"https://openalex.org/keywords/queue","display_name":"Queue","score":0.4180000126361847},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.40709999203681946},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.4059000015258789},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.396699994802475},{"id":"https://openalex.org/keywords/fuzzy-inference","display_name":"Fuzzy inference","score":0.38940000534057617},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.35839998722076416},{"id":"https://openalex.org/keywords/network-packet","display_name":"Network packet","score":0.34459999203681946}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6847000122070312},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.604200005531311},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.508400022983551},{"id":"https://openalex.org/C160403385","wikidata":"https://www.wikidata.org/wiki/Q220543","display_name":"Queue","level":2,"score":0.4180000126361847},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.40709999203681946},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.4059000015258789},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.396699994802475},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.39489999413490295},{"id":"https://openalex.org/C2986395286","wikidata":"https://www.wikidata.org/wiki/Q224821","display_name":"Fuzzy inference","level":5,"score":0.38940000534057617},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.35839998722076416},{"id":"https://openalex.org/C158379750","wikidata":"https://www.wikidata.org/wiki/Q214111","display_name":"Network packet","level":2,"score":0.34459999203681946},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.34130001068115234},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.33629998564720154},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.311599999666214},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.30489999055862427},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.30480000376701355},{"id":"https://openalex.org/C34862557","wikidata":"https://www.wikidata.org/wiki/Q178985","display_name":"Ode","level":2,"score":0.2955999970436096},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.2946999967098236},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.2777000069618225},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C2984118289","wikidata":"https://www.wikidata.org/wiki/Q29954","display_name":"Power consumption","level":3,"score":0.2678999900817871},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26669999957084656},{"id":"https://openalex.org/C68649174","wikidata":"https://www.wikidata.org/wiki/Q1379116","display_name":"Base station","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C2987376176","wikidata":"https://www.wikidata.org/wiki/Q224821","display_name":"Fuzzy inference system","level":5,"score":0.26260000467300415},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.2526000142097473},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.2526000142097473}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3773274.3774267","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3773274.3774267","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th IEEE/ACM International Conference on Utility and Cloud Computing","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2512.23310","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.23310","pdf_url":"https://arxiv.org/pdf/2512.23310","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3773274.3774267","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3773274.3774267","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th IEEE/ACM International Conference on Utility and Cloud Computing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.9083346128463745,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1509197282","https://openalex.org/W2144134437","https://openalex.org/W2605258629","https://openalex.org/W2889740153","https://openalex.org/W2920031528","https://openalex.org/W2968104784","https://openalex.org/W3049640275","https://openalex.org/W4282970339","https://openalex.org/W4313496563","https://openalex.org/W4400728225","https://openalex.org/W4405756215","https://openalex.org/W4405934565","https://openalex.org/W4409248688","https://openalex.org/W4409796734","https://openalex.org/W4410226897","https://openalex.org/W4412412131","https://openalex.org/W4413640445","https://openalex.org/W7084026035","https://openalex.org/W7084074251"],"related_works":[],"abstract_inverted_index":{"Deploying":[0],"large":[1],"language":[2],"models":[3],"(LLMs)":[4],"on":[5,125],"edge":[6,59],"devices":[7],"is":[8],"challenging":[9],"due":[10],"to":[11,158,172],"their":[12],"limited":[13],"memory":[14,180],"and":[15,27,36,60,70,94,104,131,139,152,178],"power":[16],"resources.":[17],"Cloud-only":[18],"inference":[19],"reduces":[20,144],"device":[21],"burden":[22],"but":[23],"introduces":[24],"high":[25],"latency":[26,146,168],"cost.":[28],"Static":[29],"edge\u2013cloud":[30],"partitions":[31],"optimize":[32],"a":[33,44],"single":[34],"metric":[35],"struggle":[37],"when":[38],"bandwidth":[39],"fluctuates.":[40],"We":[41],"propose":[42],"Splitwise,":[43],"novel":[45],"Lyapunov-assisted":[46],"deep":[47],"reinforcement":[48],"learning":[49],"(DRL)":[50],"framework":[51],"for":[52],"fine-grained,":[53],"adaptive":[54],"partitioning":[55],"of":[56,121],"LLMs":[57],"across":[58],"cloud":[61],"environments.":[62],"Splitwise":[63,108,143],"decomposes":[64],"transformer":[65],"layers":[66],"into":[67],"attention":[68],"heads":[69],"feed-forward":[71],"sub-blocks,":[72],"exposing":[73],"exponentially":[74],"more":[75],"partition":[76,113],"choices":[77],"than":[78],"layer-wise":[79],"schemes.":[80],"A":[81],"hierarchical":[82],"DRL":[83],"policy,":[84],"guided":[85],"by":[86,147,156,169],"Lyapunov":[87],"optimization,":[88],"jointly":[89],"minimizes":[90],"latency,":[91],"energy":[92,154],"consumption,":[93],"accuracy":[95,177],"degradation":[96],"while":[97,175],"guaranteeing":[98],"queue":[99],"stability":[100],"under":[101],"stochastic":[102],"workloads":[103],"variable":[105],"network":[106],"bandwidth.":[107],"also":[109],"guarantees":[110],"robustness":[111],"via":[112],"checkpoints":[114],"with":[115,135,161],"exponential":[116],"backoff":[117],"recovery":[118],"in":[119],"case":[120],"communication":[122],"failures.":[123],"Experiments":[124],"Jetson":[126],"Orin":[127],"NX,":[128],"Galaxy":[129],"S23,":[130],"Raspberry":[132],"Pi":[133],"5":[134],"GPT\u20112":[136],"(1.5B),":[137],"LLaMA\u20117B,":[138],"LLaMA\u201113B":[140],"show":[141],"that":[142],"end\u2011to\u2011end":[145],"1.4":[148],"\u00d7":[149,151],"\u20132.8":[150],"cuts":[153],"consumption":[155],"up":[157],"41%":[159],"compared":[160],"existing":[162],"partitioners.":[163],"It":[164],"lowers":[165],"the":[166],"95th-percentile":[167],"53\u201361%":[170],"relative":[171],"cloud-only":[173],"execution,":[174],"maintaining":[176],"modest":[179],"requirements.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-31T00:00:00"}
