{"id":"https://openalex.org/W4414198859","doi":"https://doi.org/10.1109/dac63849.2025.11132617","title":"Tropical: Enhancing SLO Attainment in Disaggregated LLM Serving via SLO-Aware Multiplexing","display_name":"Tropical: Enhancing SLO Attainment in Disaggregated LLM Serving via SLO-Aware Multiplexing","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414198859","doi":"https://doi.org/10.1109/dac63849.2025.11132617"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11132617","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132617","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085852932","display_name":"Jinming Ma","orcid":"https://orcid.org/0009-0000-1351-1377"},"institutions":[{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jinming Ma","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081670242","display_name":"Jinfan Chen","orcid":"https://orcid.org/0000-0002-3399-1109"},"institutions":[{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiefei Chen","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100663883","display_name":"Xiuhong Li","orcid":"https://orcid.org/0000-0001-8482-2623"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiuhong Li","raw_affiliation_strings":["Peking University"],"affiliations":[{"raw_affiliation_string":"Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076729908","display_name":"Jiangfei Duan","orcid":"https://orcid.org/0000-0002-6327-2033"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiangfei Duan","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113747055","display_name":"H Duanmu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haojie Duanmu","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052952011","display_name":"Xingcheng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingcheng Zhang","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005718995","display_name":"Chao Yang","orcid":"https://orcid.org/0000-0001-7426-6248"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Yang","raw_affiliation_strings":["Peking University"],"affiliations":[{"raw_affiliation_string":"Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010087030","display_name":"Dahua Lin","orcid":"https://orcid.org/0000-0002-8865-7896"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dahua Lin","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5085852932"],"corresponding_institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"],"apc_list":null,"apc_paid":null,"fwci":1.2582,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.8299961,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11192","display_name":"Underwater Vehicles and Communication Systems","score":0.9817000031471252,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11192","display_name":"Underwater Vehicles and Communication Systems","score":0.9817000031471252,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10796","display_name":"Cooperative Communication and Network Coding","score":0.9248999953269958,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9114000201225281,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multiplexing","display_name":"Multiplexing","score":0.5658000111579895},{"id":"https://openalex.org/keywords/quality-of-service","display_name":"Quality of service","score":0.5425000190734863},{"id":"https://openalex.org/keywords/queueing-theory","display_name":"Queueing theory","score":0.507099986076355},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.48260000348091125},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.37209999561309814},{"id":"https://openalex.org/keywords/turnaround-time","display_name":"Turnaround time","score":0.36469998955726624}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7634000182151794},{"id":"https://openalex.org/C19275194","wikidata":"https://www.wikidata.org/wiki/Q222903","display_name":"Multiplexing","level":2,"score":0.5658000111579895},{"id":"https://openalex.org/C5119721","wikidata":"https://www.wikidata.org/wiki/Q220501","display_name":"Quality of service","level":2,"score":0.5425000190734863},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.5424000024795532},{"id":"https://openalex.org/C22684755","wikidata":"https://www.wikidata.org/wiki/Q847526","display_name":"Queueing theory","level":2,"score":0.507099986076355},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.48260000348091125},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.37209999561309814},{"id":"https://openalex.org/C176553487","wikidata":"https://www.wikidata.org/wiki/Q7855819","display_name":"Turnaround time","level":2,"score":0.36469998955726624},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.313400000333786},{"id":"https://openalex.org/C32022120","wikidata":"https://www.wikidata.org/wiki/Q797225","display_name":"Interference (communication)","level":3,"score":0.31220000982284546},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.30329999327659607},{"id":"https://openalex.org/C43711488","wikidata":"https://www.wikidata.org/wiki/Q7534783","display_name":"Skew","level":2,"score":0.29159998893737793},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.27970001101493835},{"id":"https://openalex.org/C173512123","wikidata":"https://www.wikidata.org/wiki/Q5978010","display_name":"Statistical time division multiplexing","level":3,"score":0.26249998807907104},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C158379750","wikidata":"https://www.wikidata.org/wiki/Q214111","display_name":"Network packet","level":2,"score":0.2556999921798706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11132617","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132617","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2928897890","https://openalex.org/W4387321091","https://openalex.org/W4394998727","https://openalex.org/W4401211704","https://openalex.org/W4404401017"],"related_works":[],"abstract_inverted_index":{"To":[0],"guarantee":[1],"service":[2],"quality":[3],"in":[4,63,78,92,211,227],"transformer":[5],"based":[6],"large":[7],"language":[8],"model":[9],"(LLM)":[10],"serving,":[11,81,126],"it":[12],"is":[13],"essential":[14],"to":[15,115,153,180,192],"meet":[16],"the":[17,22,30,44,51,117,143,147,150,193,215,232],"latency":[18],"constraints":[19],"of":[20,120,163],"both":[21,64,121,170],"prefill":[23,40,52,82],"phase":[24,32],"(measured":[25,33],"by":[26,34,203],"Time-to-First-Token,":[27],"TTFT)":[28],"and":[29,41,53,66,111,123,130,146,157,173],"decode":[31,42,54,88],"Time-per-Output-Token,":[35],"TPOT).":[36],"Non-disaggregated":[37],"serving":[38,49,100,152,176,196,218],"places":[39,50],"on":[43,55],"same":[45,233],"worker,":[46],"while":[47,230],"disaggregated":[48,79,124,174,194],"isolated":[56],"workers.":[57],"However,":[58],"no":[59],"single":[60],"architecture":[61],"excels":[62],"TTFT":[65,156,201],"TPOT":[67,158,229],"metrics.":[68],"After":[69],"conducting":[70],"a":[71,186,222],"root":[72],"cause":[73],"analysis,":[74],"we":[75,127],"concluded":[76],"that":[77,141,167],"LLM":[80,99,125,151,175,195,217],"execution":[83,89],"has":[84],"minimal":[85],"interference":[86,108],"with":[87,206],"but":[90,105],"result":[91],"high":[93,155],"queuing":[94,103,144],"times.":[95],"In":[96,113],"contrast,":[97],"non-disaggregated":[98,122,172,216],"effectively":[101],"reduces":[102],"times":[104],"introduces":[106,134],"significant":[107],"between":[109],"prefills":[110],"decodes.":[112],"order":[114],"leverage":[116],"best":[118],"aspects":[119],"have":[128],"designed":[129],"implemented":[131],"Tropical.":[132],"Tropical":[133,168,198,220],"an":[135,208],"sevice-level":[136],"objectives":[137],"(SLO)-aware":[138],"multiplexing":[139],"strategy":[140],"balances":[142],"time":[145],"interference,":[148],"enabling":[149],"achieve":[154],"SLOs":[159],"simultaneously.":[160],"Our":[161],"evaluation":[162],"real-world":[164],"datasets":[165],"reveals":[166],"outperforms":[169],"state-of-the-art":[171],"systems,":[177,219],"achieving":[178],"up":[179],"$2.09":[181],"\\times$":[182,205,224],"more":[183],"requests":[184],"within":[185],"90%":[187],"SLO":[188],"attainment.":[189],"Specially,":[190],"compared":[191],"system,":[197],"improves":[199],"P90":[200,212,228,234],"performance":[202,225],"$9":[204],"only":[207],"15%":[209],"reduction":[210],"TPOT.":[213],"Against":[214],"delivers":[221],"$2.8":[223],"improvement":[226],"maintaining":[231],"TTFT.":[235]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
