{"id":"https://openalex.org/W7117711130","doi":"https://doi.org/10.48550/arxiv.2512.23310","title":"Splitwise: Collaborative Edge-Cloud Inference for LLMs via Lyapunov-Assisted DRL","display_name":"Splitwise: Collaborative Edge-Cloud Inference for LLMs via Lyapunov-Assisted DRL","publication_year":2025,"publication_date":"2025-12-29","ids":{"openalex":"https://openalex.org/W7117711130","doi":"https://doi.org/10.48550/arxiv.2512.23310"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2512.23310","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.23310","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2512.23310","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081387222","display_name":"Abolfazl Younesi","orcid":"https://orcid.org/0009-0003-0052-6475"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Younesi, Abolfazl","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5117291301","display_name":"Abbas Shabrang Maryan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maryan, Abbas Shabrang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106737354","display_name":"Elyas Oustad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oustad, Elyas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011672380","display_name":"Zahra Najafabadi Samani","orcid":"https://orcid.org/0000-0001-5182-9087"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Samani, Zahra Najafabadi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035446526","display_name":"Mohsen Ansari","orcid":"https://orcid.org/0000-0002-4670-8608"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ansari, Mohsen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5000119692","display_name":"Thomas Fahringer","orcid":"https://orcid.org/0000-0003-4293-1228"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fahringer, Thomas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.263700008392334,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.263700008392334,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.2232999950647354,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.16339999437332153,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6136000156402588},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5051000118255615},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.41370001435279846},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.4050999879837036},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.40389999747276306},{"id":"https://openalex.org/keywords/queue","display_name":"Queue","score":0.40209999680519104},{"id":"https://openalex.org/keywords/fuzzy-inference","display_name":"Fuzzy inference","score":0.3937000036239624},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.3594000041484833},{"id":"https://openalex.org/keywords/network-packet","display_name":"Network packet","score":0.35429999232292175}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6922000050544739},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6136000156402588},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5051000118255615},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.41370001435279846},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.4050999879837036},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.40389999747276306},{"id":"https://openalex.org/C160403385","wikidata":"https://www.wikidata.org/wiki/Q220543","display_name":"Queue","level":2,"score":0.40209999680519104},{"id":"https://openalex.org/C2986395286","wikidata":"https://www.wikidata.org/wiki/Q224821","display_name":"Fuzzy inference","level":5,"score":0.3937000036239624},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.38440001010894775},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.3594000041484833},{"id":"https://openalex.org/C158379750","wikidata":"https://www.wikidata.org/wiki/Q214111","display_name":"Network packet","level":2,"score":0.35429999232292175},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3490999937057495},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.34450000524520874},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.30410000681877136},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.2822999954223633},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.2809000015258789},{"id":"https://openalex.org/C2984118289","wikidata":"https://www.wikidata.org/wiki/Q29954","display_name":"Power consumption","level":3,"score":0.27000001072883606},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26989999413490295},{"id":"https://openalex.org/C68649174","wikidata":"https://www.wikidata.org/wiki/Q1379116","display_name":"Base station","level":2,"score":0.26669999957084656},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2653999924659729},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.25780001282691956},{"id":"https://openalex.org/C34862557","wikidata":"https://www.wikidata.org/wiki/Q178985","display_name":"Ode","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C2987376176","wikidata":"https://www.wikidata.org/wiki/Q224821","display_name":"Fuzzy inference system","level":5,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2512.23310","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.23310","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2512.23310","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.23310","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.9085442423820496,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deploying":[0],"large":[1],"language":[2],"models":[3],"(LLMs)":[4],"on":[5,124],"edge":[6,59],"devices":[7],"is":[8],"challenging":[9],"due":[10],"to":[11,154,168],"their":[12],"limited":[13],"memory":[14,176],"and":[15,27,36,60,70,93,103,130,138,148,174],"power":[16],"resources.":[17],"Cloud-only":[18],"inference":[19],"reduces":[20,143],"device":[21],"burden":[22],"but":[23],"introduces":[24],"high":[25],"latency":[26,145,164],"cost.":[28],"Static":[29],"edge-cloud":[30],"partitions":[31],"optimize":[32],"a":[33,44],"single":[34],"metric":[35],"struggle":[37],"when":[38],"bandwidth":[39],"fluctuates.":[40],"We":[41],"propose":[42],"Splitwise,":[43],"novel":[45],"Lyapunov-assisted":[46],"deep":[47],"reinforcement":[48],"learning":[49],"(DRL)":[50],"framework":[51],"for":[52],"fine-grained,":[53],"adaptive":[54],"partitioning":[55],"of":[56,120],"LLMs":[57],"across":[58],"cloud":[61],"environments.":[62],"Splitwise":[63,107,142],"decomposes":[64],"transformer":[65],"layers":[66],"into":[67],"attention":[68],"heads":[69],"feed-forward":[71],"sub-blocks,":[72],"exposing":[73],"more":[74],"partition":[75,112],"choices":[76],"than":[77],"layer-wise":[78],"schemes.":[79],"A":[80],"hierarchical":[81],"DRL":[82],"policy,":[83],"guided":[84],"by":[85,146,152,165],"Lyapunov":[86],"optimization,":[87],"jointly":[88],"minimizes":[89],"latency,":[90],"energy":[91,150],"consumption,":[92],"accuracy":[94,173],"degradation":[95],"while":[96,171],"guaranteeing":[97],"queue":[98],"stability":[99],"under":[100],"stochastic":[101],"workloads":[102],"variable":[104],"network":[105],"bandwidth.":[106],"also":[108],"guarantees":[109],"robustness":[110],"via":[111],"checkpoints":[113],"with":[114,134,157],"exponential":[115],"backoff":[116],"recovery":[117],"in":[118],"case":[119],"communication":[121],"failures.":[122],"Experiments":[123],"Jetson":[125],"Orin":[126],"NX,":[127],"Galaxy":[128],"S23,":[129],"Raspberry":[131],"Pi":[132],"5":[133],"GPT-2":[135],"(1.5B),":[136],"LLaMA-7B,":[137],"LLaMA-13B":[139],"show":[140],"that":[141],"end-to-end":[144],"1.4x-2.8x":[147],"cuts":[149],"consumption":[151],"up":[153],"41%":[155],"compared":[156],"existing":[158],"partitioners.":[159],"It":[160],"lowers":[161],"the":[162],"95th-percentile":[163],"53-61%":[166],"relative":[167],"cloud-only":[169],"execution,":[170],"maintaining":[172],"modest":[175],"requirements.":[177]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-31T00:00:00"}
