{"id":"https://openalex.org/W7133484723","doi":"https://doi.org/10.1109/hpca68181.2026.11408598","title":"Adaptive Draft Sequence Length: Enhancing Speculative Decoding Throughput on PIM-Enabled Systems","display_name":"Adaptive Draft Sequence Length: Enhancing Speculative Decoding Throughput on PIM-Enabled Systems","publication_year":2026,"publication_date":"2026-01-31","ids":{"openalex":"https://openalex.org/W7133484723","doi":"https://doi.org/10.1109/hpca68181.2026.11408598"},"language":null,"primary_location":{"id":"doi:10.1109/hpca68181.2026.11408598","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408598","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115602997","display_name":"Runze Wang","orcid":"https://orcid.org/0009-0009-8847-8167"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Runze Wang","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Lab,Wuhan,China,430074"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Lab,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Qinggang Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinggang Wang","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Lab,Wuhan,China,430074"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Lab,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128070683","display_name":"Haifeng Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Haifeng Liu","raw_affiliation_strings":["School of Computer Science and Engineering, University of New South Wales,Sydney,NSW,Australia,2052"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, University of New South Wales,Sydney,NSW,Australia,2052","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128066426","display_name":"Long Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Zheng","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Lab,Wuhan,China,430074"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Lab,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128087606","display_name":"Xiaofei Liao","orcid":null},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofei Liao","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Lab,Wuhan,China,430074"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Lab,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128096813","display_name":"Hai Jin","orcid":null},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hai Jin","raw_affiliation_strings":["School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Lab,Wuhan,China,430074"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Huazhong University of Science and Technology,National Engineering Research Center for Big Data Technology and System/Services Computing Technology and System Lab/Cluster and Grid Computing Lab,Wuhan,China,430074","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123022627","display_name":"Jingling Xue","orcid":null},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jingling Xue","raw_affiliation_strings":["School of Computer Science and Engineering, University of New South Wales,Sydney,NSW,Australia,2052"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, University of New South Wales,Sydney,NSW,Australia,2052","institution_ids":["https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5115602997"],"corresponding_institution_ids":["https://openalex.org/I47720641"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.46574198,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"15"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.3084999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.3084999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.13539999723434448,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.03449999913573265,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.5605000257492065},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5467000007629395},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5424000024795532},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.31769999861717224},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.3093999922275543}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.704200029373169},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5605000257492065},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5467000007629395},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5424000024795532},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.392300009727478},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.31769999861717224},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2896000146865845},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2630000114440918},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.262800008058548},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.25699999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca68181.2026.11408598","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408598","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7794448185","display_name":null,"funder_award_id":"2023YFB4503400","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8742751441","display_name":null,"funder_award_id":"62402456,62450064,62322205","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W2982008795","https://openalex.org/W3017024317","https://openalex.org/W3042639142","https://openalex.org/W3129307903","https://openalex.org/W3159727696","https://openalex.org/W3189166979","https://openalex.org/W3189877953","https://openalex.org/W3204998121","https://openalex.org/W3206453033","https://openalex.org/W4214686755","https://openalex.org/W4280496502","https://openalex.org/W4280525524","https://openalex.org/W4292261931","https://openalex.org/W4308083513","https://openalex.org/W4308083754","https://openalex.org/W4313546932","https://openalex.org/W4321636575","https://openalex.org/W4385245566","https://openalex.org/W4387321091","https://openalex.org/W4388757726","https://openalex.org/W4392427708","https://openalex.org/W4394998968","https://openalex.org/W4395073431","https://openalex.org/W4395106409","https://openalex.org/W4395112660","https://openalex.org/W4399677840","https://openalex.org/W4401211704","https://openalex.org/W4402671950","https://openalex.org/W4402672007","https://openalex.org/W4404133527","https://openalex.org/W4404955001","https://openalex.org/W4404955792","https://openalex.org/W4407425079","https://openalex.org/W4408105642","https://openalex.org/W4408925909","https://openalex.org/W4411486231","https://openalex.org/W4412945539","https://openalex.org/W4416146691","https://openalex.org/W4416203772"],"related_works":[],"abstract_inverted_index":{"Transformer-based":[0],"large":[1],"language":[2,37,56],"models":[3],"(LLMs)":[4],"exhibit":[5],"remarkable":[6],"generative":[7],"capabilities,":[8],"but":[9],"their":[10],"inference":[11],"throughput":[12],"is":[13],"limited":[14],"by":[15,32,51,136,179],"the":[16,61,66,130,143,168,253,286],"autoregressive":[17],"decoding":[18,28,228],"process,":[19],"which":[20,45],"generates":[21],"only":[22],"one":[23],"token":[24],"per":[25],"iteration.":[26],"Speculative":[27],"mitigates":[29],"this":[30],"bottleneck":[31],"using":[33],"a":[34,52,94,120,208,274],"lightweight":[35],"draft":[36,43,96,104,123,131,153,183,193,216],"model":[38,57],"(DLM)":[39],"to":[40,82,100,175,213,237,252],"generate":[41],"multiple":[42],"tokens,":[44],"are":[46],"then":[47],"verified":[48],"in":[49,111,182],"parallel":[50],"more":[53],"accurate":[54],"target":[55],"(TLM).":[58],"To":[59,201],"accommodate":[60],"differing":[62],"computational":[63],"patterns":[64],"of":[65,103,147,167,266],"DLM":[67,169,232],"and":[68,78,85,114,145,170,185,234,241,278],"TLM,":[69],"prior":[70],"work":[71],"has":[72],"leveraged":[73],"heterogeneous":[74,158,210],"systems":[75,91,159],"combining":[76],"xPUs":[77],"processing-in-memory":[79],"(PIM)":[80],"units":[81],"offload":[83],"compute-":[84],"memory-intensive":[86],"operators,":[87],"respectively.":[88],"However,":[89],"existing":[90,156],"often":[92],"adopt":[93],"fixed":[95],"sequence":[97],"length,":[98],"leading":[99],"excessive":[101],"rejection":[102],"tokens":[105],"during":[106],"verification-especially":[107],"under":[108],"large-batch":[109],"scenarios-resulting":[110],"redundant":[112],"computation":[113],"reduced":[115],"efficiency.":[116],"This":[117],"paper":[118],"proposes":[119],"runtime":[121],"adaptive":[122,152,215],"length":[124,132,194],"adjustment":[125],"technique":[126],"that":[127,230,248,261],"dynamically":[128,249],"tailors":[129],"for":[133],"each":[134],"request":[135],"monitoring":[137],"cumulative":[138],"acceptance":[139],"probabilities,":[140],"thereby":[141],"minimizing":[142],"generation":[144],"verification":[146,236],"invalid":[148],"tokens.":[149],"Yet,":[150],"integrating":[151],"lengths":[154,217],"into":[155],"PIM-enabled":[157,209],"introduces":[160],"two":[161,221],"new":[162],"challenges:":[163],"(1)":[164,224],"sequential":[165],"execution":[166],"TLM":[171,235],"becomes":[172],"inefficient":[173],"due":[174],"synchronization":[176],"bubbles":[177],"caused":[178],"request-wise":[180],"variability":[181,195],"lengths,":[184],"(2)":[186,242],"static":[187],"operator":[188,197,246],"mappings":[189],"become":[190],"suboptimal":[191],"as":[192],"alters":[196],"arithmetic":[198,244],"intensities":[199],"dynamically.":[200],"address":[202],"these":[203],"issues,":[204],"we":[205],"introduce":[206],"SADDLE,":[207],"system":[211],"designed":[212],"exploit":[214],"effectively.":[218],"SADDLE":[219,262],"incorporates":[220],"key":[222],"mechanisms:":[223],"an":[225,243],"asynchronous":[226],"speculative":[227],"pipeline":[229],"decouples":[231],"prediction":[233],"reduce":[238],"idle":[239],"time,":[240],"intensity-aware":[245],"scheduler":[247],"assigns":[250],"operators":[251],"most":[254],"suitable":[255],"hardware":[256],"units.":[257],"Experimental":[258],"results":[259],"show":[260],"achieves":[263],"average":[264],"speedups":[265],"<tex":[267,279],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[268,280],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\mathbf{2.":[269],"8":[270],"8}":[271],"\\times$</tex>":[272,284],"over":[273,285],"state-of-the-art":[275],"GPU-only":[276],"solution":[277],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\mathbf{1.":[281],"7":[282],"1}":[283],"best-performing":[287],"GPU+PIM":[288],"baseline.":[289]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2026-03-05T00:00:00"}
