{"id":"https://openalex.org/W7119138573","doi":"https://doi.org/10.1109/lcomm.2026.3651580","title":"Quantize-Sample-and-Verify: LLM Acceleration via Adaptive Edge-Cloud Speculative Decoding","display_name":"Quantize-Sample-and-Verify: LLM Acceleration via Adaptive Edge-Cloud Speculative Decoding","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7119138573","doi":"https://doi.org/10.1109/lcomm.2026.3651580"},"language":"en","primary_location":{"id":"doi:10.1109/lcomm.2026.3651580","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lcomm.2026.3651580","pdf_url":null,"source":{"id":"https://openalex.org/S147316732","display_name":"IEEE Communications Letters","issn_l":"1089-7798","issn":["1089-7798","1558-2558","2373-7891"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310316002","host_organization_name":"IEEE Communications Society","host_organization_lineage":["https://openalex.org/P4310316002","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Communications Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Communications Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122009002","display_name":"Guangyi Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guangyi Zhang","raw_affiliation_strings":["College of Information Science and Electronic Engineering and Zhejiang Provincial Key Laboratory of Multi-Modal Communication Networks and Intelligent Information Processing, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Electronic Engineering and Zhejiang Provincial Key Laboratory of Multi-Modal Communication Networks and Intelligent Information Processing, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122018503","display_name":"Yunlong Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunlong Cai","raw_affiliation_strings":["College of Information Science and Electronic Engineering and Zhejiang Provincial Key Laboratory of Multi-Modal Communication Networks and Intelligent Information Processing, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Electronic Engineering and Zhejiang Provincial Key Laboratory of Multi-Modal Communication Networks and Intelligent Information Processing, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121280747","display_name":"Guanding Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanding Yu","raw_affiliation_strings":["College of Information Science and Electronic Engineering and Zhejiang Provincial Key Laboratory of Multi-Modal Communication Networks and Intelligent Information Processing, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Electronic Engineering and Zhejiang Provincial Key Laboratory of Multi-Modal Communication Networks and Intelligent Information Processing, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071289803","display_name":"Petar Popovski","orcid":"https://orcid.org/0000-0001-6195-4797"},"institutions":[{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Petar Popovski","raw_affiliation_strings":["Connectivity Section, Aalborg University, Aalborg, Denmark"],"affiliations":[{"raw_affiliation_string":"Connectivity Section, Aalborg University, Aalborg, Denmark","institution_ids":["https://openalex.org/I891191580"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017736224","display_name":"Osvaldo Simeone","orcid":"https://orcid.org/0000-0001-9898-3209"},"institutions":[{"id":"https://openalex.org/I4210152629","display_name":"Eastern University","ror":"https://ror.org/05e2ncr14","country_code":"BD","type":"education","lineage":["https://openalex.org/I4210152629"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Osvaldo Simeone","raw_affiliation_strings":["Intelligent Networked Systems Institute (INSI), Northeastern University London, London, U.K"],"affiliations":[{"raw_affiliation_string":"Intelligent Networked Systems Institute (INSI), Northeastern University London, London, U.K","institution_ids":["https://openalex.org/I4210152629"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5122009002"],"corresponding_institution_ids":["https://openalex.org/I4210123185"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0437891,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"30","issue":null,"first_page":"852","last_page":"856"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.14190000295639038,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.14190000295639038,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.08250000327825546,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10148","display_name":"Advanced MIMO Systems Optimization","score":0.07029999792575836,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.7763000130653381},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6481999754905701},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6309999823570251},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.5232999920845032},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.508400022983551},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.4837999939918518},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.44119998812675476},{"id":"https://openalex.org/keywords/telecommunications-link","display_name":"Telecommunications link","score":0.3846000134944916}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8636999726295471},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.7763000130653381},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6481999754905701},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6309999823570251},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5232999920845032},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.508400022983551},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.4837999939918518},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.44119998812675476},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.39570000767707825},{"id":"https://openalex.org/C138660444","wikidata":"https://www.wikidata.org/wiki/Q5607897","display_name":"Telecommunications link","level":2,"score":0.3846000134944916},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.3815000057220459},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.36809998750686646},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.367000013589859},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.35030001401901245},{"id":"https://openalex.org/C101765175","wikidata":"https://www.wikidata.org/wiki/Q577764","display_name":"Communications system","level":2,"score":0.3425999879837036},{"id":"https://openalex.org/C193969084","wikidata":"https://www.wikidata.org/wiki/Q7452500","display_name":"Sequential decoding","level":4,"score":0.31690001487731934},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.30480000376701355},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.28349998593330383},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2696000039577484},{"id":"https://openalex.org/C157899210","wikidata":"https://www.wikidata.org/wiki/Q1395022","display_name":"Convolutional code","level":3,"score":0.2685999870300293},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.26190000772476196},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2581999897956848},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.2524000108242035}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lcomm.2026.3651580","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lcomm.2026.3651580","pdf_url":null,"source":{"id":"https://openalex.org/S147316732","display_name":"IEEE Communications Letters","issn_l":"1089-7798","issn":["1089-7798","1558-2558","2373-7891"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310316002","host_organization_name":"IEEE Communications Society","host_organization_lineage":["https://openalex.org/P4310316002","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Communications Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Communications Letters","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:publications/7c77fdeb-73ce-4865-838d-b3f60bf8d59b","is_oa":false,"landing_page_url":"https://vbn.aau.dk/da/publications/7c77fdeb-73ce-4865-838d-b3f60bf8d59b","pdf_url":null,"source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Zhang, G, Cai, Y, Yu, G, Popovski, P & Simeone, O 2026, 'Quantize-Sample-and-Verify : LLM Acceleration via Adaptive Edge-Cloud Speculative Decoding', IEEE Communications Letters, vol. 30, pp. 852-856. https://doi.org/10.1109/LCOMM.2026.3651580","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2016190290","https://openalex.org/W2746553466","https://openalex.org/W2963929190","https://openalex.org/W3034322405","https://openalex.org/W3155774885","https://openalex.org/W4323666907","https://openalex.org/W4399534541","https://openalex.org/W4403678765","https://openalex.org/W4405078778","https://openalex.org/W4410771411","https://openalex.org/W4413967631","https://openalex.org/W4415797132","https://openalex.org/W4415798202"],"related_works":[],"abstract_inverted_index":{"In":[0,52],"edge-cloud":[1,97,148],"speculative":[2],"decoding":[3,144],"(SD),":[4],"edge":[5,39],"devices":[6],"equipped":[7],"with":[8],"small":[9],"language":[10,21],"models":[11,22],"(SLMs)":[12],"generate":[13],"draft":[14,121],"tokens":[15,76],"that":[16,62,73,82,99,113,137],"are":[17],"verified":[18,75],"by":[19,88,117],"large":[20],"(LLMs)":[23],"in":[24,30,126,146],"the":[25,34,46,65,69,74,78,89,120,138],"cloud.":[26],"A":[27],"key":[28],"bottleneck":[29],"such":[31],"systems":[32],"is":[33],"limited":[35],"communication":[36,103],"bandwidth":[37],"between":[38],"and":[40,123,132],"cloud,":[41],"which":[42],"necessitates":[43],"quantization":[44,124],"of":[45,68,80],"information":[47],"transmitted":[48],"about":[49],"generated":[50,86],"tokens.":[51],"this":[53,106],"work,":[54],"we":[55,108],"introduce":[56],"a":[57,93],"novel":[58],"quantize-sample":[59],"(Q-S)":[60],"strategy":[61],"provably":[63],"preserves":[64],"output":[66],"distribution":[67,79],"cloud-based":[70],"model,":[71,107],"ensuring":[72],"match":[77],"those":[81],"would":[83],"have":[84],"been":[85],"directly":[87],"LLM.":[90],"We":[91],"develop":[92],"throughput":[94,116],"model":[95],"for":[96,102],"SD":[98],"explicitly":[100],"accounts":[101],"latency.":[104],"Leveraging":[105],"propose":[109],"an":[110],"adaptive":[111],"mechanism":[112],"optimizes":[114],"token":[115],"dynamically":[118],"adjusting":[119],"length":[122],"precision":[125],"response":[127],"to":[128],"both":[129],"semantic":[130],"uncertainty":[131],"channel":[133],"conditions.":[134],"Simulations":[135],"demonstrate":[136],"proposed":[139],"Q-S":[140],"approach":[141],"significantly":[142],"improves":[143],"efficiency":[145],"realistic":[147],"deployment":[149],"scenarios.":[150]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2026-01-08T00:00:00"}
