{"id":"https://openalex.org/W7124909593","doi":"https://doi.org/10.1109/cloudcom67567.2025.11331464","title":"System-Aware LLM Arena: Benchmarking Utility, Latency, and Cost","display_name":"System-Aware LLM Arena: Benchmarking Utility, Latency, and Cost","publication_year":2025,"publication_date":"2025-11-14","ids":{"openalex":"https://openalex.org/W7124909593","doi":"https://doi.org/10.1109/cloudcom67567.2025.11331464"},"language":null,"primary_location":{"id":"doi:10.1109/cloudcom67567.2025.11331464","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cloudcom67567.2025.11331464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 lEEE International Conference on Cloud Computing Technology and Science (CloudCom)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084922477","display_name":"Renwen Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I182722699","display_name":"Shenzhen Polytechnic University","ror":"https://ror.org/00d2w9g53","country_code":"CN","type":"education","lineage":["https://openalex.org/I182722699"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Renwen Ma","raw_affiliation_strings":["Shenzhen Polytechnic University,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Polytechnic University,Shenzhen,China","institution_ids":["https://openalex.org/I182722699"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123371012","display_name":"Songrui Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Songrui Wang","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen,Shenzhen,China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123392813","display_name":"Yicheng Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yicheng Yu","raw_affiliation_strings":["Zhejiang University of Technology,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University of Technology,Hangzhou,China","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123440821","display_name":"Tongkai Ji","orcid":null},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tongkai Ji","raw_affiliation_strings":["Zhejiang University of Technology,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University of Technology,Hangzhou,China","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112864283","display_name":"Wei-Ang Dai","orcid":"https://orcid.org/0009-0003-6938-5228"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Dai","raw_affiliation_strings":["Zhejiang University of Technology,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University of Technology,Hangzhou,China","institution_ids":["https://openalex.org/I55712492"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5084922477"],"corresponding_institution_ids":["https://openalex.org/I182722699"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.72997802,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.17030000686645508,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.17030000686645508,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.13899999856948853,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.11110000312328339,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.8374000191688538},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7610999941825867},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6061999797821045},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4887000024318695},{"id":"https://openalex.org/keywords/government","display_name":"Government (linguistics)","score":0.3962000012397766},{"id":"https://openalex.org/keywords/pareto-principle","display_name":"Pareto principle","score":0.3573000133037567},{"id":"https://openalex.org/keywords/service-provider","display_name":"Service provider","score":0.3508000075817108}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.8374000191688538},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7610999941825867},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6855000257492065},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6061999797821045},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4887000024318695},{"id":"https://openalex.org/C2778137410","wikidata":"https://www.wikidata.org/wiki/Q2732820","display_name":"Government (linguistics)","level":2,"score":0.3962000012397766},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.3939000070095062},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.3573000133037567},{"id":"https://openalex.org/C116537","wikidata":"https://www.wikidata.org/wiki/Q2169973","display_name":"Service provider","level":3,"score":0.3508000075817108},{"id":"https://openalex.org/C95821633","wikidata":"https://www.wikidata.org/wiki/Q973302","display_name":"Cost driver","level":2,"score":0.3172000050544739},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.30649998784065247},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3052999973297119},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.3003000020980835},{"id":"https://openalex.org/C164624739","wikidata":"https://www.wikidata.org/wiki/Q754331","display_name":"Activity-based costing","level":2,"score":0.2973000109195709},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.2971000075340271},{"id":"https://openalex.org/C76178495","wikidata":"https://www.wikidata.org/wiki/Q4808784","display_name":"Asset (computer security)","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C68781425","wikidata":"https://www.wikidata.org/wiki/Q2052203","display_name":"Multi-objective optimization","level":2,"score":0.29019999504089355},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.28929999470710754},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.2718999981880188}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cloudcom67567.2025.11331464","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cloudcom67567.2025.11331464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 lEEE International Conference on Cloud Computing Technology and Science (CloudCom)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W1982063824","https://openalex.org/W2153975459","https://openalex.org/W2763110165","https://openalex.org/W3037032032","https://openalex.org/W4387321091","https://openalex.org/W4401042689"],"related_works":[],"abstract_inverted_index":{"Cloud":[0],"providers":[1,143],"increasingly":[2],"face":[3],"the":[4,34,100,124],"chal-lenge":[5],"of":[6,104],"selecting":[7],"among":[8],"rapidly":[9],"evolving":[10],"large":[11],"language":[12],"models":[13,111,119],"(LLMs)":[14],"for":[15,140],"deployment":[16],"at":[17],"scale.":[18],"Existing":[19],"benchmarks,":[20],"typically":[21],"based":[22],"on":[23],"human":[24],"or":[25],"crowd":[26],"preferences,":[27],"advance":[28],"model":[29],"evaluation":[30],"but":[31,167],"rarely":[32],"cap-ture":[33],"system-level":[35],"constraints":[36],"critical":[37],"to":[38,133,144],"cloud-based":[39],"deployability.":[40],"We":[41,81],"present":[42],"a":[43,84],"system-aware":[44],"LLM":[45],"Arena":[46],"benchmark":[47,60,125],"that":[48,88],"combines":[49],"calibrated":[50],"LLM-as-a-judge":[51],"scores":[52],"with":[53,65,120],"runtime":[54],"telemetry":[55],"and":[56,67,75,78,96,136,146,151,164,169,175],"cost":[57,77,152],"signals.":[58],"The":[59],"measures":[61],"judged":[62],"utility":[63],"together":[64],"end-to-end":[66],"tail":[68],"latency,":[69],"stability,":[70],"memory":[71],"usage,":[72],"hard-ware":[73],"utilization,":[74],"per-request":[76],"energy":[79],"esti-mates.":[80],"further":[82],"introduce":[83],"scene-aware":[85],"composite":[86],"score":[87],"makes":[89],"latency-cost":[90],"trade-offs":[91],"explicit":[92],"under":[93,149],"real-time,":[94],"budget-constrained,":[95],"balanced":[97],"scenarios,":[98,166],"reflecting":[99],"practical":[101],"decision-making":[102],"needs":[103],"cloud":[105],"providers.":[106],"Pareto":[107],"frontiers":[108],"visualize":[109],"non-dominated":[110],"across":[112],"these":[113],"dimensions.":[114],"Using":[115],"locally":[116],"deployed":[117],"open-source":[118],"DeepSeek-R1":[121],"as":[122],"judge,":[123],"demonstrates":[126],"how":[127],"system":[128],"factors":[129],"reshape":[130],"leaderboards":[131],"compared":[132],"preference-only":[134],"views,":[135],"provides":[137],"actionable":[138],"guidance":[139],"cloud/edge":[141],"service":[142],"select":[145],"deploy":[147],"LLMs":[148],"latency":[150],"constraints.":[153],"For":[154],"example,":[155],"in":[156,162,173],"our":[157],"experiments":[158],"GLM":[159],"ranked":[160],"highest":[161],"edge":[163],"government":[165],"Qwen3_4B":[168],"Llama3_1B":[170],"surpassed":[171],"it":[172],"industrial":[174],"customer":[176],"settings.":[177]},"counts_by_year":[],"updated_date":"2026-01-22T23:29:09.771500","created_date":"2026-01-21T00:00:00"}
