{"id":"https://openalex.org/W7148297671","doi":"https://doi.org/10.48550/arxiv.2604.00136","title":"ParetoBandit: Budget-Paced Adaptive Routing for Non-Stationary LLM Serving","display_name":"ParetoBandit: Budget-Paced Adaptive Routing for Non-Stationary LLM Serving","publication_year":2026,"publication_date":"2026-03-31","ids":{"openalex":"https://openalex.org/W7148297671","doi":"https://doi.org/10.48550/arxiv.2604.00136"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.00136","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00136","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.00136","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132797425","display_name":"Annette Taberner-Miller","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Taberner-Miller, Annette","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5132797425"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10714","display_name":"Software-Defined Networks and 5G","score":0.4749000072479248,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10714","display_name":"Software-Defined Networks and 5G","score":0.4749000072479248,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10138","display_name":"Network Traffic and Congestion Control","score":0.11580000072717667,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.07440000027418137,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/router","display_name":"Router","score":0.6322000026702881},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.532800018787384},{"id":"https://openalex.org/keywords/routing","display_name":"Routing (electronic design automation)","score":0.5245000123977661},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.42010000348091125},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.41130000352859497},{"id":"https://openalex.org/keywords/quality-of-service","display_name":"Quality of service","score":0.3626999855041504},{"id":"https://openalex.org/keywords/budget-constraint","display_name":"Budget constraint","score":0.3142000138759613}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7073000073432922},{"id":"https://openalex.org/C2775896111","wikidata":"https://www.wikidata.org/wiki/Q642560","display_name":"Router","level":2,"score":0.6322000026702881},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.532800018787384},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.5245000123977661},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.42010000348091125},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.41130000352859497},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3971000015735626},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.38429999351501465},{"id":"https://openalex.org/C5119721","wikidata":"https://www.wikidata.org/wiki/Q220501","display_name":"Quality of service","level":2,"score":0.3626999855041504},{"id":"https://openalex.org/C8505890","wikidata":"https://www.wikidata.org/wiki/Q605095","display_name":"Budget constraint","level":2,"score":0.3142000138759613},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.30790001153945923},{"id":"https://openalex.org/C180591934","wikidata":"https://www.wikidata.org/wiki/Q1253369","display_name":"Downtime","level":2,"score":0.3003999888896942},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.29269999265670776},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.2662999927997589},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C104954878","wikidata":"https://www.wikidata.org/wiki/Q1648707","display_name":"Routing protocol","level":3,"score":0.2619999945163727},{"id":"https://openalex.org/C116537","wikidata":"https://www.wikidata.org/wiki/Q2169973","display_name":"Service provider","level":3,"score":0.26159998774528503},{"id":"https://openalex.org/C207609745","wikidata":"https://www.wikidata.org/wiki/Q4944086","display_name":"Bootstrapping (finance)","level":2,"score":0.25999999046325684},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.00136","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00136","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.00136","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00136","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.6239930391311646,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multi-model":[0],"LLM":[1],"serving":[2],"operates":[3],"in":[4,53,72,89],"a":[5,27,44,68,122,127,166,190],"non-stationary,":[6],"noisy":[7],"environment:":[8],"providers":[9],"revise":[10],"pricing,":[11],"model":[12,92,151,156,192],"quality":[13,145,180,186],"can":[14],"shift":[15],"or":[16,91],"regress":[17],"without":[18,126],"notice,":[19],"and":[20,80,130,146,179,188],"new":[21],"models":[22],"arrive":[23],"regularly.":[24],"More":[25],"than":[26],"dozen":[28],"recent":[29],"methods":[30],"have":[31],"proposed":[32],"learned":[33],"routers":[34],"to":[35,86,177,184],"navigate":[36],"the":[37,54,139,169],"resulting":[38],"quality--cost":[39],"tradeoff":[40],"across":[41],"portfolios":[42],"spanning":[43],"$\\sim$530$\\times$":[45],"cost":[46,70,124,147],"range.":[47],"Despite":[48],"this":[49],"activity,":[50],"two":[51],"gaps":[52],"current":[55],"solution":[56],"space":[57],"limit":[58],"routing":[59],"effectiveness":[60],"under":[61],"these":[62],"conditions:":[63],"no":[64],"existing":[65],"router":[66,100,170],"enforces":[67,121],"dollar-denominated":[69],"ceiling":[71,125],"closed":[73],"loop":[74],"over":[75],"an":[76,97,115],"open-ended":[77],"request":[78],"stream,":[79],"none":[81],"provides":[82],"principled":[83],"online":[84,116],"adaptation":[85],"post-deployment":[87],"shifts":[88,181],"pricing":[90],"quality.":[93],"We":[94],"present":[95],"ParetoBandit,":[96],"open-source":[98],"adaptive":[99],"built":[101],"on":[102,134],"cost-aware":[103],"contextual":[104],"bandits":[105],"that":[106,120,137],"addresses":[107],"both":[108],"gaps.":[109],"Its":[110],"core":[111],"contributions":[112],"are:":[113],"(1)":[114],"primal--dual":[117],"budget":[118,172],"pacer":[119],"per-request":[123],"known":[128],"horizon,":[129],"(2)":[131],"geometric":[132],"forgetting":[133],"sufficient":[135],"statistics":[136],"gives":[138],"bandit":[140],"bounded":[141],"memory":[142],"for":[143],"tracking":[144],"shifts.":[148],"A":[149],"hot-swap":[150],"registry":[152],"further":[153],"supports":[154],"runtime":[155],"changes":[157],"with":[158,165,182],"budget-controlled":[159],"exploration.":[160],"On":[161],"1,824":[162],"benchmark":[163],"prompts":[164],"three-model":[167],"portfolio,":[168],"maintains":[171],"compliance":[173],"within":[174,193],"0.4%,":[175],"adapts":[176],"price":[178],"up":[183],"+0.071":[185],"lift,":[187],"integrates":[189],"cold-started":[191],"$\\sim$142":[194],"steps.":[195]},"counts_by_year":[],"updated_date":"2026-04-16T06:03:46.269776","created_date":"2026-04-03T00:00:00"}
