{"id":"https://openalex.org/W7154717353","doi":"https://doi.org/10.48550/arxiv.2604.15186","title":"Scepsy: Serving Agentic Workflows Using Aggregate LLM Pipelines","display_name":"Scepsy: Serving Agentic Workflows Using Aggregate LLM Pipelines","publication_year":2026,"publication_date":"2026-04-16","ids":{"openalex":"https://openalex.org/W7154717353","doi":"https://doi.org/10.48550/arxiv.2604.15186"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.15186","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15186","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.15186","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002898752","display_name":"Marcel Wagenl\u00e4nder","orcid":"https://orcid.org/0009-0008-0594-4138"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wagenl\u00e4nder, Marcel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133852731","display_name":"Otto White","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"White, Otto","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133869169","display_name":"Britannio Jarrett","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jarrett, Britannio","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133916420","display_name":"Pedro Silvestre","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Silvestre, Pedro","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022887839","display_name":"Yanda Tao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Yanda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133859692","display_name":"Guo Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Guo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072678098","display_name":"Huanzhou Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Huanzhou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024693147","display_name":"Llu\u00eds Vilanova","orcid":"https://orcid.org/0000-0002-1452-840X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vilanova, Ll\u00fais","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126558990","display_name":"Peter Pietzuch","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pietzuch, Peter","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5002898752"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.181099995970726,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.181099995970726,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.13099999725818634,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.08370000123977661,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.835099995136261},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6510000228881836},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5954999923706055},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.576200008392334},{"id":"https://openalex.org/keywords/aggregate","display_name":"Aggregate (composite)","score":0.5620999932289124},{"id":"https://openalex.org/keywords/low-latency","display_name":"Low latency (capital markets)","score":0.4016000032424927},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.3901999890804291},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.38839998841285706}],"concepts":[{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.835099995136261},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8033999800682068},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6510000228881836},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5954999923706055},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.576200008392334},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.5620999932289124},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4277999997138977},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.4016000032424927},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.3901999890804291},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.38839998841285706},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3732999861240387},{"id":"https://openalex.org/C2775937380","wikidata":"https://www.wikidata.org/wiki/Q1232589","display_name":"Replica","level":2,"score":0.36000001430511475},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.33090001344680786},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.3301999866962433},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.3240000009536743},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.31310001015663147},{"id":"https://openalex.org/C199845137","wikidata":"https://www.wikidata.org/wiki/Q145490","display_name":"Network topology","level":2,"score":0.3003000020980835},{"id":"https://openalex.org/C2776834041","wikidata":"https://www.wikidata.org/wiki/Q25346349","display_name":"Execution model","level":2,"score":0.3000999987125397},{"id":"https://openalex.org/C107027933","wikidata":"https://www.wikidata.org/wiki/Q2006448","display_name":"Stream processing","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.2858000099658966},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.15186","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15186","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.15186","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15186","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Agentic":[0],"workflows":[1,17,53,79,91,207],"carry":[2],"out":[3],"complex":[4],"tasks":[5],"by":[6],"orchestrating":[7],"multiple":[8],"large":[9],"language":[10],"models":[11],"(LLMs)":[12],"and":[13,36,178,217],"tools.":[14],"Serving":[15],"such":[16],"at":[18],"a":[19,68,81,141,149,157,168,183],"target":[20,158],"throughput":[21,216],"with":[22],"low":[23],"latency":[24,154,220],"is":[25,140],"challenging":[26],"because":[27],"they":[28],"can":[29],"be":[30],"defined":[31],"using":[32],"arbitrary":[33,76],"agentic":[34,70,78,90,206],"frameworks":[35],"exhibit":[37],"unpredictable":[38,93],"execution":[39,41,59,102],"times:":[40],"may":[42],"branch,":[43],"fan-out,":[44],"or":[45,228],"recur":[46],"in":[47,52],"data-dependent":[48],"ways.":[49],"Since":[50],"LLMs":[51,123,226],"often":[54],"outnumber":[55],"available":[56],"GPUs,":[57],"their":[58],"also":[60],"leads":[61],"to":[62,133,166,186,213,222],"GPU":[63,82,112,150,173,193],"oversubscription.":[64],"We":[65],"describe":[66],"Scepsy,":[67],"new":[69],"serving":[71],"system":[72],"that":[73,152,209,224],"efficiently":[74],"schedules":[75],"multi-LLM":[77],"onto":[80,191],"cluster.":[83],"Scepsy":[84,109,160,210],"exploits":[85],"the":[86,96,122,162,188,192],"insight":[87],"that,":[88],"while":[89,155,197],"have":[92],"end-to-end":[94],"latencies,":[95],"shares":[97],"of":[98],"each":[99],"LLM's":[100],"total":[101],"times":[103],"are":[104],"comparatively":[105],"stable":[106],"across":[107],"executions.":[108],"decides":[110],"on":[111,115,204,230],"allocations":[113],"based":[114],"these":[116,131],"aggregate":[117],"shares:":[118],"first,":[119],"it":[120],"profiles":[121],"under":[124],"different":[125],"parallelism":[126,176],"degrees.":[127],"It":[128,181],"then":[129],"uses":[130,161,182],"statistics":[132],"construct":[134],"an":[135],"Aggregate":[136,163],"LLM":[137,164],"Pipeline,":[138],"which":[139],"lightweight":[142],"latency/throughput":[143],"predictor":[144],"for":[145],"allocations.":[146,232],"To":[147],"find":[148],"allocation":[151,190],"minimizes":[153],"achieving":[156],"throughput,":[159],"Pipeline":[165],"explore":[167],"search":[169],"space":[170],"over":[171],"fractional":[172],"shares,":[174],"tensor":[175],"degrees,":[177],"replica":[179],"counts.":[180],"hierarchical":[184],"heuristic":[185],"place":[187],"best":[189],"cluster,":[194],"minimizing":[195],"fragmentation,":[196],"respecting":[198],"network":[199],"topology":[200],"constraints.":[201],"Our":[202],"evaluation":[203],"realistic":[205],"shows":[208],"achieves":[211],"up":[212],"2.4x":[214],"higher":[215],"27x":[218],"lower":[219],"compared":[221],"systems":[223],"optimize":[225],"independently":[227],"rely":[229],"user-specified":[231]},"counts_by_year":[],"updated_date":"2026-04-18T06:05:20.339008","created_date":"2026-04-18T00:00:00"}
