{"id":"https://openalex.org/W7116870306","doi":"https://doi.org/10.1145/3772429.3772445","title":"Beyond GPT-5: Making LLMs Cheaper and Better via Performance-Efficiency Optimized Routing","display_name":"Beyond GPT-5: Making LLMs Cheaper and Better via Performance-Efficiency Optimized Routing","publication_year":2025,"publication_date":"2025-11-21","ids":{"openalex":"https://openalex.org/W7116870306","doi":"https://doi.org/10.1145/3772429.3772445"},"language":null,"primary_location":{"id":"doi:10.1145/3772429.3772445","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772429.3772445","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 The Seventh International Conference on Distributed Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3772429.3772445","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yiqun Zhang","orcid":"https://orcid.org/0009-0007-0423-5746"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yiqun Zhang","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0007-0423-5746","affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121109134","display_name":"Hao Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Li","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0002-0090-4870","affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jianhao Chen","orcid":"https://orcid.org/0009-0006-4363-0549"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianhao Chen","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0006-4363-0549","affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121118669","display_name":"Hangfan Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hangfan Zhang","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0003-9249-7498","affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Peng Ye","orcid":"https://orcid.org/0000-0002-8486-7562"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Ye","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-8486-7562","affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Lei Bai","orcid":"https://orcid.org/0000-0001-8968-3386"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Bai","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-8968-3386","affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052387391","display_name":"Shuyue Hu","orcid":"https://orcid.org/0000-0002-1908-1344"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuyue Hu","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-1908-1344","affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.79704131,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"122","last_page":"129"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.18150000274181366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.18150000274181366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.17440000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.09049999713897705,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/routing","display_name":"Routing (electronic design automation)","score":0.5787000060081482},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5695000290870667},{"id":"https://openalex.org/keywords/pareto-principle","display_name":"Pareto principle","score":0.4512999951839447},{"id":"https://openalex.org/keywords/routing-algorithm","display_name":"Routing algorithm","score":0.3131999969482422},{"id":"https://openalex.org/keywords/running-time","display_name":"Running time","score":0.2612999975681305}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6801999807357788},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.5787000060081482},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5695000290870667},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.4512999951839447},{"id":"https://openalex.org/C2984173633","wikidata":"https://www.wikidata.org/wiki/Q22725","display_name":"Routing algorithm","level":4,"score":0.3131999969482422},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2865999937057495},{"id":"https://openalex.org/C3017489831","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Running time","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.260699987411499},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.2605000138282776},{"id":"https://openalex.org/C109332788","wikidata":"https://www.wikidata.org/wiki/Q615445","display_name":"Economic efficiency","level":2,"score":0.24729999899864197}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3772429.3772445","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772429.3772445","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 The Seventh International Conference on Distributed Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3772429.3772445","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772429.3772445","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 The Seventh International Conference on Distributed Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W4385571189","https://openalex.org/W4401043635","https://openalex.org/W4402684003","https://openalex.org/W4409348154","https://openalex.org/W4415797314","https://openalex.org/W4416035026"],"related_works":[],"abstract_inverted_index":{"Balancing":[0],"performance":[1,135],"and":[2,49,62,83,89,129,158],"efficiency":[3],"is":[4,171],"a":[5,28,39,52,76,96,146],"central":[6],"challenge":[7],"in":[8,110],"large":[9],"language":[10],"model":[11,30,73,106,124],"(LLM)":[12],"advancement.":[13],"GPT-5":[14],"addresses":[15],"this":[16,34],"with":[17],"test-time":[18,40],"routing,":[19],"dynamically":[20],"assigning":[21],"queries":[22],"to":[23,69],"either":[24],"an":[25],"efficient":[26],"or":[27],"high-capacity":[29],"during":[31],"inference.":[32],"In":[33],"work,":[35],"we":[36],"present":[37],"Avengers-Pro,":[38],"routing":[41],"framework":[42],"that":[43,134],"ensembles":[44],"LLMs":[45],"of":[46,120,133],"varying":[47,95],"capacities":[48],"efficiencies,":[50],"providing":[51],"unified":[53],"solution":[54],"for":[55,154,162],"all":[56,167],"performance-efficiency":[57,77,97],"tradeoffs.":[58],"The":[59],"Avengers-Pro":[60],"embeds":[61],"clusters":[63],"incoming":[64],"queries,":[65],"then":[66],"routes":[67],"each":[68],"the":[70,103,117,121,151,159],"most":[71],"suitable":[72],"based":[74],"on":[75],"score.":[78],"Across":[79],"6":[80],"challenging":[81],"benchmarks":[82],"8":[84],"leading":[85],"models\u2014including":[86],"GPT-5-medium,":[87],"Gemini-2.5-pro,":[88],"Claude-opus-4.1\u2014Avengers-Pro":[90],"achieves":[91,145],"state-of-the-art":[92],"results:":[93],"by":[94,108],"trade-off":[98],"parameter,":[99],"it":[100,114,144],"can":[101,115],"surpass":[102],"strongest":[104,122],"single":[105,123,168],"(GPT-5-medium)":[107],"+7%":[109],"average":[111,118],"accuracy.":[112],"Moreover,":[113],"match":[116],"accuracy":[119,153],"at":[125,136,173],"27%":[126],"lower":[127,138],"cost,":[128,157],"reach":[130],"\u223c":[131],"90%":[132],"63%":[137],"cost.":[139],"Last":[140],"but":[141],"not":[142],"least,":[143],"Pareto":[147],"frontier,":[148],"consistently":[149],"yielding":[150],"highest":[152],"any":[155,163],"given":[156,164],"lowest":[160],"cost":[161],"accuracy,":[165],"among":[166],"models.":[169],"Code":[170],"available":[172],"https://github.com/ZhangYiqun018/AvengersPro.":[174]},"counts_by_year":[],"updated_date":"2025-12-23T23:15:37.779995","created_date":"2025-12-23T00:00:00"}
