{"id":"https://openalex.org/W4409348259","doi":"https://doi.org/10.1609/aaai.v39i23.34647","title":"Adaptive Draft-Verification for Efficient Large Language Model Decoding","display_name":"Adaptive Draft-Verification for Efficient Large Language Model Decoding","publication_year":2025,"publication_date":"2025-04-11","ids":{"openalex":"https://openalex.org/W4409348259","doi":"https://doi.org/10.1609/aaai.v39i23.34647"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v39i23.34647","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v39i23.34647","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/34647/36802","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/34647/36802","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113373185","display_name":"Xukun Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xukun Liu","raw_affiliation_strings":["Northwestern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northwestern University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087003069","display_name":"Bowen Lei","orcid":"https://orcid.org/0000-0003-2882-9753"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bowen Lei","raw_affiliation_strings":["Texas A&M University - College Station"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Texas A&M University - College Station","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087584395","display_name":"Ruqi Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruqi Zhang","raw_affiliation_strings":["Purdue University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Purdue University","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068433690","display_name":"Dongkuan Xu","orcid":"https://orcid.org/0000-0002-1456-9658"},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dongkuan (DK) Xu","raw_affiliation_strings":["North Carolina State University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"North Carolina State University","institution_ids":["https://openalex.org/I137902535"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6004,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.55128205,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":97},"biblio":{"volume":"39","issue":"23","first_page":"24668","last_page":"24676"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9743000268936157,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9743000268936157,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.6914851665496826},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6584814190864563},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.17990776896476746}],"concepts":[{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.6914851665496826},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6584814190864563},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.17990776896476746}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1609/aaai.v39i23.34647","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v39i23.34647","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/34647/36802","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:ojs.aaai.org:article/34647","is_oa":false,"landing_page_url":"https://ojs.aaai.org/index.php/AAAI/article/view/34647","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2159-5399","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v39i23.34647","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v39i23.34647","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/34647/36802","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3348058117","display_name":"Harnessing links between historical business and household microdata and street-view images to assess transit-induced neighborhood changes at small spatial scales","funder_award_id":"2416846","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8809419540","display_name":"Collaborative Research: CyberTraining: Implementation: Medium: EcoTern: Pioneering a CI Workforce for Sustainable and Transdisciplinary Environmental Science Research","funder_award_id":"2417850","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409348259.pdf","grobid_xml":"https://content.openalex.org/works/W4409348259.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Large":[0],"language":[1],"model":[2,42,159],"(LLM)":[3],"decoding":[4,33,65,123,168,240],"involves":[5,129],"generating":[6],"a":[7,13,23,36,115,143,173,252],"sequence":[8],"of":[9,63,154,198,203,255],"tokens":[10],"based":[11],"on":[12,86,226],"given":[14],"context,":[15],"where":[16],"each":[17,44],"token":[18,45,164],"is":[19,48,82],"predicted":[20],"one":[21],"at":[22],"time":[24,137],"using":[25],"the":[26,41,94,151,155,158,167,185,194,199,212,239],"model's":[27],"learned":[28],"probabilities.":[29],"The":[30,60,201],"typical":[31],"autoregressive":[32],"method":[34],"requires":[35],"separate":[37],"forward":[38],"pass":[39],"through":[40],"for":[43,54,93,249],"generated,":[46],"which":[47,81,97,120],"computationally":[49],"inefficient":[50],"and":[51,71,100,107,181,191,219,230],"poses":[52],"challenges":[53],"deploying":[55],"LLMs":[56],"in":[57,207,251],"latency-sensitive":[58],"scenarios.":[59],"main":[61],"limitations":[62],"current":[64],"methods":[66],"stem":[67],"from":[68],"their":[69],"inefficiencies":[70],"resource":[72],"demands.":[73],"Existing":[74],"approaches":[75],"either":[76],"necessitate":[77],"fine-tuning":[78],"smaller":[79],"models,":[80],"resource-intensive,":[83],"or":[84],"relying":[85],"fixed":[87],"retrieval":[88],"schemes":[89],"to":[90,102,138,148,160,162,193,210,217],"construct":[91],"drafts":[92,186],"next":[95],"tokens,":[96],"lack":[98],"adaptability":[99],"fail":[101],"generalize":[103],"across":[104],"different":[105],"models":[106],"contexts.":[108],"To":[109],"address":[110],"these":[111],"issues,":[112],"we":[113,171,233],"introduce":[114],"novel":[116],"methodology":[117],"called":[118],"Adaptix,":[119],"accelerates":[121,238],"LLM":[122,146,231],"without":[124],"requiring":[125],"fine-tuning.":[126],"Our":[127],"approach":[128],"an":[130],"adaptive":[131],"draft-verification":[132],"process":[133,241],"that":[134,177,184,235],"evolves":[135],"over":[136],"improve":[139],"efficiency.":[140],"We":[141],"utilize":[142],"tri-gram":[144],"matrix-based":[145],"representation":[147],"dynamically":[149],"approximate":[150],"output":[152,196],"distribution":[153,197,214],"LLM,":[156],"allowing":[157],"adjust":[161],"changing":[163],"probabilities":[165],"during":[166],"process.":[169],"Additionally,":[170],"implement":[172],"draft":[174,213],"construction":[175],"mechanism":[176],"effectively":[178],"balances":[179],"exploration":[180],"exploitation,":[182],"ensuring":[183],"generated":[187],"are":[188],"both":[189],"diverse":[190],"close":[192],"true":[195],"LLM.":[200],"importance":[202],"this":[204],"design":[205],"lies":[206],"its":[208],"ability":[209],"optimize":[211],"adaptively,":[215],"leading":[216],"faster":[218],"more":[220],"accurate":[221],"decoding.":[222],"Through":[223],"extensive":[224],"experiments":[225],"various":[227],"benchmark":[228],"datasets":[229],"architectures,":[232],"demonstrate":[234],"Adaptix":[236],"significantly":[237],"while":[242],"maintaining":[243],"high":[244],"accuracy,":[245],"making":[246],"it":[247],"suitable":[248],"deployment":[250],"wide":[253],"range":[254],"practical":[256],"applications.":[257]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-07-02T09:51:11.867554","created_date":"2025-10-10T00:00:00"}
