{"id":"https://openalex.org/W7117870265","doi":"https://doi.org/10.1145/3787470.3787480","title":"OmniRouter: Budget and Performance Controllable Multi-LLM Routing","display_name":"OmniRouter: Budget and Performance Controllable Multi-LLM Routing","publication_year":2025,"publication_date":"2025-12-30","ids":{"openalex":"https://openalex.org/W7117870265","doi":"https://doi.org/10.1145/3787470.3787480"},"language":"en","primary_location":{"id":"doi:10.1145/3787470.3787480","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3787470.3787480","pdf_url":null,"source":{"id":"https://openalex.org/S4210176598","display_name":"ACM SIGKDD Explorations Newsletter","issn_l":"1931-0145","issn":["1931-0145","1931-0153"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGKDD Explorations Newsletter","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100929110","display_name":"Kai Mei","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096112","display_name":"Rutgers Sexual and Reproductive Health and Rights","ror":"https://ror.org/00rcvgx40","country_code":"NL","type":"other","lineage":["https://openalex.org/I4210096112"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Kai Mei","raw_affiliation_strings":["Department of Computer Science, Rutgers University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Rutgers University","institution_ids":["https://openalex.org/I4210096112"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081309714","display_name":"Wujiang Xu","orcid":"https://orcid.org/0000-0002-3500-1068"},"institutions":[{"id":"https://openalex.org/I4210096112","display_name":"Rutgers Sexual and Reproductive Health and Rights","ror":"https://ror.org/00rcvgx40","country_code":"NL","type":"other","lineage":["https://openalex.org/I4210096112"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Wujiang Xu","raw_affiliation_strings":["Department of Computer Science, Rutgers University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Rutgers University","institution_ids":["https://openalex.org/I4210096112"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121719133","display_name":"Minghao Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096112","display_name":"Rutgers Sexual and Reproductive Health and Rights","ror":"https://ror.org/00rcvgx40","country_code":"NL","type":"other","lineage":["https://openalex.org/I4210096112"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Minghao Guo","raw_affiliation_strings":["Department of Computer Science, Rutgers University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Rutgers University","institution_ids":["https://openalex.org/I4210096112"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103921409","display_name":"Shuhang Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096112","display_name":"Rutgers Sexual and Reproductive Health and Rights","ror":"https://ror.org/00rcvgx40","country_code":"NL","type":"other","lineage":["https://openalex.org/I4210096112"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Shuhang Lin","raw_affiliation_strings":["Department of Computer Science, Rutgers University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Rutgers University","institution_ids":["https://openalex.org/I4210096112"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066709505","display_name":"Y. Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096112","display_name":"Rutgers Sexual and Reproductive Health and Rights","ror":"https://ror.org/00rcvgx40","country_code":"NL","type":"other","lineage":["https://openalex.org/I4210096112"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Yongfeng Zhang","raw_affiliation_strings":["Department of Computer Science, Rutgers University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Rutgers University","institution_ids":["https://openalex.org/I4210096112"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100929110"],"corresponding_institution_ids":["https://openalex.org/I4210096112"],"apc_list":null,"apc_paid":null,"fwci":1.9057,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.9193409,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"27","issue":"2","first_page":"107","last_page":"116"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.21089999377727509,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.21089999377727509,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.2011999934911728,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.0746999979019165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/router","display_name":"Router","score":0.5849999785423279},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.5120000243186951},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4830000102519989},{"id":"https://openalex.org/keywords/routing","display_name":"Routing (electronic design automation)","score":0.4629000127315521},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.43860000371932983},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.41519999504089355},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4108999967575073},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.37869998812675476},{"id":"https://openalex.org/keywords/greedy-algorithm","display_name":"Greedy algorithm","score":0.353300005197525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7904000282287598},{"id":"https://openalex.org/C2775896111","wikidata":"https://www.wikidata.org/wiki/Q642560","display_name":"Router","level":2,"score":0.5849999785423279},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.5120000243186951},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4860999882221222},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4830000102519989},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.4629000127315521},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.43860000371932983},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.421099990606308},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.41519999504089355},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4108999967575073},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.37869998812675476},{"id":"https://openalex.org/C51823790","wikidata":"https://www.wikidata.org/wiki/Q504353","display_name":"Greedy algorithm","level":2,"score":0.353300005197525},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.35109999775886536},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.31119999289512634},{"id":"https://openalex.org/C204948658","wikidata":"https://www.wikidata.org/wiki/Q1119410","display_name":"Static routing","level":4,"score":0.31119999289512634},{"id":"https://openalex.org/C8505890","wikidata":"https://www.wikidata.org/wiki/Q605095","display_name":"Budget constraint","level":2,"score":0.30730000138282776},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2842999994754791},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.2768000066280365},{"id":"https://openalex.org/C2780609101","wikidata":"https://www.wikidata.org/wiki/Q17156588","display_name":"Resource management (computing)","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C56086750","wikidata":"https://www.wikidata.org/wiki/Q6042592","display_name":"Integer programming","level":2,"score":0.2653999924659729},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C194648553","wikidata":"https://www.wikidata.org/wiki/Q1364774","display_name":"Spare part","level":2,"score":0.25440001487731934}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3787470.3787480","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3787470.3787480","pdf_url":null,"source":{"id":"https://openalex.org/S4210176598","display_name":"ACM SIGKDD Explorations Newsletter","issn_l":"1931-0145","issn":["1931-0145","1931-0153"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGKDD Explorations Newsletter","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2093505886","https://openalex.org/W3021424624","https://openalex.org/W4385562507","https://openalex.org/W4387321091","https://openalex.org/W4404780931","https://openalex.org/W4415797314"],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1,41],"models":[2,20,42,114,124],"(LLMs)":[3],"deliver":[4],"superior":[5],"performance":[6,133],"but":[7],"require":[8],"substantial":[9],"computational":[10,210],"resources":[11],"and":[12,146,155,223],"operate":[13],"with":[14,26,170],"relatively":[15],"low":[16],"efficiency,":[17],"while":[18,56,129,189,207],"smaller":[19],"can":[21],"efficiently":[22],"handle":[23],"simpler":[24],"tasks":[25],"fewer":[27],"resources.":[28],"LLMrouting":[29],"is":[30,140],"a":[31,44,67,98,119,136,159],"crucial":[32],"paradigm":[33],"that":[34,125,165,197],"dynamically":[35,182],"selects":[36],"the":[37,73,115,131,144,152,177,224],"most":[38],"suitable":[39],"large":[40],"from":[43],"pool":[45],"of":[46,108,148],"candidates":[47],"to":[48,142,191,201,217],"process":[49],"diverse":[50],"inputs,":[51],"ensuring":[52,130],"optimal":[53,69,179],"resource":[54,89],"utilization":[55],"maintaining":[57],"response":[58,205],"quality.":[59],"Existing":[60],"routing":[61,101,116],"frameworks":[62],"typically":[63],"model":[64],"this":[65,93],"as":[66,118],"locally":[68],"decision-making":[70],"problem,":[71,94,122],"selecting":[72],"presumed":[74],"best-fit":[75],"LLM":[76,105],"for":[77,103,162],"each":[78],"query":[79],"individually,":[80],"which":[81],"overlooks":[82],"global":[83],"budget":[84],"constraints,":[85],"resulting":[86],"in":[87,204],"ineffective":[88],"allocation.":[90],"To":[91],"tackle":[92],"we":[95,157],"introduce":[96],"OmniRouter,":[97],"fundamentally":[99],"controllable":[100],"framework":[102],"multi-":[104],"serving.":[106],"Instead":[107],"making":[109],"per-query":[110],"greedy":[111],"choices,":[112],"OmniRouter":[113,198],"task":[117],"constrained":[120,160],"optimization":[121],"assigning":[123],"minimize":[126],"total":[127],"cost":[128,154],"required":[132],"level.":[134],"Specifically,":[135],"hybrid":[137],"retrieval-augmented":[138],"predictor":[139],"designed":[141],"predict":[143],"capabilities":[145],"costs":[147,211],"LLMs.":[149],"After":[150],"obtaining":[151],"predicted":[153],"performance,":[156],"utilize":[158],"optimizer":[161],"cost-optimal":[163],"assignments":[164],"employs":[166],"Lagrangian":[167],"dual":[168],"decomposition":[169],"adaptive":[171],"multipliers.":[172],"It":[173],"iteratively":[174],"converges":[175],"toward":[176],"globally":[178],"query-model":[180],"allocation,":[181],"balancing":[183],"latency":[184],"minimization":[185],"against":[186],"quality":[187],"thresholds":[188],"adhering":[190],"heterogeneous":[192],"capacity":[193],"constraints.":[194],"Experiments":[195],"show":[196],"achieves":[199],"up":[200],"6.30%":[202],"improvement":[203],"accuracy":[206],"simultaneously":[208],"reducing":[209],"by":[212],"at":[213,228],"least":[214],"10.15%":[215],"compared":[216],"competitive":[218],"router":[219],"baselines.":[220],"The":[221],"code":[222],"dataset":[225],"are":[226],"available":[227],"https:":[229],"//github.com/dongyuanjushi/OmniRouter.":[230]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-01-01T00:00:00"}
