{"id":"https://openalex.org/W7123863483","doi":"https://doi.org/10.1145/3772052.3772239","title":"AdaSpec: Adaptive Speculative Decoding for Fast, SLO-Aware Large Language Model Serving","display_name":"AdaSpec: Adaptive Speculative Decoding for Fast, SLO-Aware Large Language Model Serving","publication_year":2025,"publication_date":"2025-11-19","ids":{"openalex":"https://openalex.org/W7123863483","doi":"https://doi.org/10.1145/3772052.3772239"},"language":null,"primary_location":{"id":"doi:10.1145/3772052.3772239","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3772052.3772239","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2503.05096","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Kaiyu Huang","orcid":"https://orcid.org/0009-0008-2340-4143"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaiyu Huang","raw_affiliation_strings":["Tongji University, Shanghai, China and Shenzhen Research Institute of Big Data, The Chinese University of Hong Kong, Shenzhen, Shenzhen, Guangdong, China"],"raw_orcid":"https://orcid.org/0009-0008-2340-4143","affiliations":[{"raw_affiliation_string":"Tongji University, Shanghai, China and Shenzhen Research Institute of Big Data, The Chinese University of Hong Kong, Shenzhen, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hao Wu","orcid":"https://orcid.org/0000-0003-2570-4648"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Wu","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, Hubei, China"],"raw_orcid":"https://orcid.org/0000-0003-2570-4648","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122956593","display_name":"Zhubo Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhubo Shi","raw_affiliation_strings":["Tongji University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0007-2042-2947","affiliations":[{"raw_affiliation_string":"Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122987435","display_name":"Han Zou","orcid":null},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Han Zou","raw_affiliation_strings":["Tongji University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0002-7503-542X","affiliations":[{"raw_affiliation_string":"Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051893788","display_name":"Minchen Yu","orcid":"https://orcid.org/0000-0002-6797-9028"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minchen Yu","raw_affiliation_strings":["School of Data Science, The Chinese University of Hong Kong, Shenzhen, Shenzhen, Guangdong, China and Shenzhen Research Institute of Big Data, The Chinese University of Hong Kong, Shenzhen, Shenzhen, Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0002-6797-9028","affiliations":[{"raw_affiliation_string":"School of Data Science, The Chinese University of Hong Kong, Shenzhen, Shenzhen, Guangdong, China and Shenzhen Research Institute of Big Data, The Chinese University of Hong Kong, Shenzhen, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059252324","display_name":"Qingjiang Shi","orcid":"https://orcid.org/0000-0003-0507-9080"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingjiang Shi","raw_affiliation_strings":["Tongji University, Shanghai, China and Shenzhen Research Institute of Big Data, The Chinese University of Hong Kong, Shenzhen, Shenzhen, Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0003-0507-9080","affiliations":[{"raw_affiliation_string":"Tongji University, Shanghai, China and Shenzhen Research Institute of Big Data, The Chinese University of Hong Kong, Shenzhen, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210116924"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.75429362,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"361","last_page":"374"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.22259999811649323,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.22259999811649323,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.09769999980926514,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.09049999713897705,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6704000234603882},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6488999724388123},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5533999800682068},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5529000163078308},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5095000267028809},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3885999917984009},{"id":"https://openalex.org/keywords/service","display_name":"Service (business)","score":0.382099986076355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8723000288009644},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6704000234603882},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6488999724388123},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5533999800682068},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5529000163078308},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5095000267028809},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4016000032424927},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3885999917984009},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.382099986076355},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.37549999356269836},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.3244999945163727},{"id":"https://openalex.org/C141331961","wikidata":"https://www.wikidata.org/wiki/Q2164465","display_name":"Speculative execution","level":2,"score":0.29499998688697815},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C147297375","wikidata":"https://www.wikidata.org/wiki/Q6674930","display_name":"Look-ahead","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.25859999656677246}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3772052.3772239","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3772052.3772239","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2503.05096","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.05096","pdf_url":"https://arxiv.org/pdf/2503.05096","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2503.05096","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.05096","pdf_url":"https://arxiv.org/pdf/2503.05096","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W2798291715","https://openalex.org/W2912924812","https://openalex.org/W2963929190","https://openalex.org/W3099700870","https://openalex.org/W3209166877","https://openalex.org/W4387321091","https://openalex.org/W4395112660","https://openalex.org/W4401042997","https://openalex.org/W4402672007","https://openalex.org/W4402683901","https://openalex.org/W4404386015","https://openalex.org/W4404792854"],"related_works":[],"abstract_inverted_index":{"Cloud-based":[0],"Large":[1],"Language":[2],"Model":[3],"(LLM)":[4],"services":[5],"often":[6,51],"face":[7],"challenges":[8],"in":[9,63],"achieving":[10],"low":[11],"inference":[12,78,153],"latency":[13],"and":[14,32,58,66,90,100,115,139],"meeting":[15],"Service":[16],"Level":[17],"Objectives":[18],"(SLOs)":[19],"under":[20],"dynamic":[21,59],"request":[22,88],"patterns.":[23],"Speculative":[24],"decoding,":[25],"which":[26],"exploits":[27],"lightweight":[28],"models":[29],"for":[30,34],"drafting":[31,114],"LLMs":[33],"verification,":[35],"has":[36],"emerged":[37],"as":[38],"a":[39,95],"compelling":[40],"technique":[41],"to":[42,53,55,86,98,118,146,150],"accelerate":[43],"LLM":[44,77,130],"inference.":[45],"However,":[46],"existing":[47],"speculative":[48,83,105,152],"decoding":[49],"solutions":[50],"fail":[52],"adapt":[54],"fluctuating":[56],"workloads":[57],"system":[60,79,91],"environments,":[61],"resulting":[62],"impaired":[64],"performance":[65,120,142],"SLO":[67,124],"violations.":[68],"In":[69],"this":[70],"paper,":[71],"we":[72],"introduce":[73],"AdaSpec,":[74],"an":[75],"efficient":[76],"that":[80,134],"dynamically":[81],"adjusts":[82],"strategies":[84,106],"according":[85],"real-time":[87],"loads":[89],"configurations.":[92],"AdaSpec":[93,135],"proposes":[94],"theoretical":[96],"model":[97],"analyze":[99],"predict":[101],"the":[102],"efficiency":[103],"of":[104],"across":[107],"diverse":[108],"scenarios.":[109],"Additionally,":[110],"it":[111],"implements":[112],"intelligent":[113],"verification":[116],"algorithms":[117],"maximize":[119],"while":[121],"ensuring":[122],"high":[123],"attainment.":[125],"Experimental":[126],"results":[127],"on":[128],"real-world":[129],"service":[131],"traces":[132],"demonstrate":[133],"consistently":[136],"meets":[137],"SLOs":[138],"achieves":[140],"substantial":[141],"improvements,":[143],"delivering":[144],"up":[145],"66%":[147],"speedup":[148],"compared":[149],"state-of-the-art":[151],"systems.":[154],"The":[155],"source":[156],"code":[157],"is":[158],"publicly":[159],"available":[160],"at":[161],"https://github.com/cerebellumking/AdaSpec":[162]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-14T00:00:00"}
