{"id":"https://openalex.org/W7137951082","doi":"https://doi.org/10.1609/aaai.v40i36.40255","title":"Steering Pretrained Drafters During Speculative Decoding","display_name":"Steering Pretrained Drafters During Speculative Decoding","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137951082","doi":"https://doi.org/10.1609/aaai.v40i36.40255"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i36.40255","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i36.40255","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40255/44216","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40255/44216","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129673974","display_name":"Fr\u00e9d\u00e9ric Berdoz","orcid":null},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Fr\u00e9d\u00e9ric Berdoz","raw_affiliation_strings":["ETH Zurich"],"affiliations":[{"raw_affiliation_string":"ETH Zurich","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120414886","display_name":"Peer Rheinboldt","orcid":null},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Peer Rheinboldt","raw_affiliation_strings":["ETH Zurich"],"affiliations":[{"raw_affiliation_string":"ETH Zurich","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129662793","display_name":"Roger Wattenhofer","orcid":null},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Roger Wattenhofer","raw_affiliation_strings":["ETH Zurich"],"affiliations":[{"raw_affiliation_string":"ETH Zurich","institution_ids":["https://openalex.org/I35440088"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5129673974"],"corresponding_institution_ids":["https://openalex.org/I35440088"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1643469,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"36","first_page":"30067","last_page":"30075"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2540999948978424,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2540999948978424,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.09960000216960907,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.07769999653100967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.923799991607666},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.79830002784729},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6766999959945679},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5781000256538391},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.516700029373169},{"id":"https://openalex.org/keywords/scratch","display_name":"Scratch","score":0.448199987411499},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.4034000039100647}],"concepts":[{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.923799991607666},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.79830002784729},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7820000052452087},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6766999959945679},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5781000256538391},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.551800012588501},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.516700029373169},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.448199987411499},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4343999922275543},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.4034000039100647},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.3569999933242798},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3449999988079071},{"id":"https://openalex.org/C51823790","wikidata":"https://www.wikidata.org/wiki/Q504353","display_name":"Greedy algorithm","level":2,"score":0.31610000133514404},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.3028999865055084},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.29580000042915344},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2865000069141388},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25929999351501465},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2542000114917755}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i36.40255","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i36.40255","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40255/44216","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i36.40255","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i36.40255","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40255/44216","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7137951082.pdf","grobid_xml":"https://content.openalex.org/works/W7137951082.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Speculative":[0],"decoding":[1],"accelerates":[2],"language":[3],"model":[4],"inference":[5],"by":[6,95,134],"separating":[7],"generation":[8,66],"into":[9,113],"fast":[10],"drafting":[11,31,72],"and":[12,25,111,141,161],"parallel":[13],"verification.":[14],"Its":[15],"main":[16],"limitation":[17],"is":[18,74],"drafter\u2013verifier":[19],"misalignment,":[20],"which":[21],"limits":[22],"token":[23],"acceptance":[24,60,90],"reduces":[26],"overall":[27],"effectiveness.":[28],"While":[29],"small":[30],"heads":[32],"trained":[33],"from":[34,106],"scratch":[35],"compensate":[36],"with":[37],"speed,":[38],"they":[39],"struggle":[40],"when":[41,46,71],"verification":[42,78],"dominates":[43],"latency":[44,73],"or":[45,79],"inputs":[47],"are":[48],"out":[49],"of":[50,92,131],"distribution.":[51],"In":[52,82],"contrast,":[53],"pretrained":[54,93,115,162],"drafters,":[55],"though":[56],"slower,":[57],"achieve":[58],"higher":[59],"rates":[61,91],"thanks":[62],"to":[63,77,87,118,136,158],"stronger":[64],"standalone":[65],"capabilities,":[67],"making":[68],"them":[69],"competitive":[70],"negligible":[75,149],"relative":[76],"communication":[80],"overhead.":[81,151],"this":[83],"work,":[84],"we":[85],"aim":[86],"improve":[88],"the":[89,107,114,129],"drafters":[94],"introducing":[96],"a":[97,102],"lightweight":[98],"dynamic":[99],"alignment":[100,121],"mechanism:":[101],"steering":[103],"vector":[104],"computed":[105],"verifier\u2019s":[108],"hidden":[109],"states":[110],"injected":[112],"drafter.":[116],"Compared":[117],"existing":[119,159],"offline":[120],"methods":[122],"such":[123],"as":[124],"distillation,":[125],"our":[126,153],"approach":[127,154],"boosts":[128],"number":[130],"accepted":[132],"tokens":[133],"up":[135],"35%":[137],"under":[138,143],"standard":[139],"sampling":[140],"22%":[142],"greedy":[144],"sampling,":[145],"all":[146],"while":[147],"incurring":[148],"computational":[150],"Importantly,":[152],"can":[155],"be":[156],"retrofitted":[157],"architectures":[160],"models,":[163],"enabling":[164],"rapid":[165],"adoption.":[166]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
