{"id":"https://openalex.org/W7128779120","doi":"https://doi.org/10.48550/arxiv.2602.12151","title":"OServe: Accelerating LLM Serving via Spatial-Temporal Workload Orchestration","display_name":"OServe: Accelerating LLM Serving via Spatial-Temporal Workload Orchestration","publication_year":2026,"publication_date":"2026-02-12","ids":{"openalex":"https://openalex.org/W7128779120","doi":"https://doi.org/10.48550/arxiv.2602.12151"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.12151","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070725905","display_name":"Youhe Jiang","orcid":"https://orcid.org/0009-0000-9297-642X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jiang, Youhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125963862","display_name":"Fangcheng Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Fangcheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070993300","display_name":"Taiyi Wang","orcid":"https://orcid.org/0009-0004-2255-5944"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Taiyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125946371","display_name":"Guoliang He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Guoliang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125693000","display_name":"Eiko Yoneki","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yoneki, Eiko","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5070725905"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.2290000021457672,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.2290000021457672,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.14010000228881836,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.10320000350475311,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.8810999989509583},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.6689000129699707},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6323000192642212},{"id":"https://openalex.org/keywords/orchestration","display_name":"Orchestration","score":0.5557000041007996},{"id":"https://openalex.org/keywords/response-time","display_name":"Response time","score":0.3075000047683716}],"concepts":[{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.8810999989509583},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8571000099182129},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.6689000129699707},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6323000192642212},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6308000087738037},{"id":"https://openalex.org/C199168358","wikidata":"https://www.wikidata.org/wiki/Q3367000","display_name":"Orchestration","level":3,"score":0.5557000041007996},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.45809999108314514},{"id":"https://openalex.org/C19012869","wikidata":"https://www.wikidata.org/wiki/Q578372","display_name":"Response time","level":2,"score":0.3075000047683716},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.2996000051498413},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.28209999203681946},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26440000534057617}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.12151","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.12151","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.12151","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.12151","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Serving":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"can":[5],"benefit":[6],"immensely":[7],"from":[8],"parallelizing":[9],"both":[10,90],"the":[11,65],"model":[12,60,86,106,123],"and":[13,25,36,52,67,84,92],"input":[14],"requests":[15,32],"across":[16],"multiple":[17],"devices,":[18],"but":[19],"incoming":[20],"workloads":[21,29],"exhibit":[22],"substantial":[23],"spatial":[24,91],"temporal":[26,93],"heterogeneity.":[27,94],"Spatially,":[28],"comprise":[30],"heterogeneous":[31,83,105],"with":[33,82],"varying":[34],"compute":[35],"memory":[37],"demands.":[38],"Temporally,":[39],"workload":[40,111,129],"composition":[41],"varies":[42],"over":[43],"time.":[44],"Nevertheless,":[45],"existing":[46],"systems":[47],"typically":[48],"assume":[49],"spatially":[50],"uniform":[51],"temporally":[53],"stable":[54],"workloads,":[55],"employing":[56],"a":[57,98],"homogeneous,":[58],"static":[59],"deployment.":[61],"This":[62],"mismatch":[63],"between":[64],"assumption":[66],"real-world":[68,133],"spatial-temporal":[69],"heterogeneity":[70],"results":[71],"in":[72,125],"suboptimal":[73],"performance.":[74],"We":[75],"present":[76],"OServe,":[77],"an":[78,116],"LLM":[79],"serving":[80,149],"system":[81],"flexible":[85],"deployment":[87],"that":[88,103,121,136],"addresses":[89],"First,":[95],"OServe":[96,114,137],"introduces":[97],"novel":[99],"workload-aware":[100],"scheduling":[101],"algorithm":[102],"optimizes":[104],"deployments":[107,124],"according":[108],"to":[109,127,142,147],"real-time":[110],"characteristics.":[112],"Second,":[113],"proposes":[115],"efficient":[117],"workload-adaptive":[118],"switching":[119],"method":[120],"migrates":[122],"response":[126],"predicted":[128],"changes.":[130],"Experiments":[131],"on":[132],"traces":[134],"show":[135],"improves":[138],"performance":[139],"by":[140],"up":[141],"2$\\times$":[143],"(average:":[144],"1.5$\\times$)":[145],"compared":[146],"state-of-the-art":[148],"systems.":[150]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-14T00:00:00"}
