{"id":"https://openalex.org/W7134922491","doi":"https://doi.org/10.48550/arxiv.2603.09065","title":"Learning Adaptive LLM Decoding","display_name":"Learning Adaptive LLM Decoding","publication_year":2026,"publication_date":"2026-03-10","ids":{"openalex":"https://openalex.org/W7134922491","doi":"https://doi.org/10.48550/arxiv.2603.09065"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.09065","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09065","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.09065","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111246507","display_name":"Chloe Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Su, Chloe H.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101518717","display_name":"Zhe Ye","orcid":"https://orcid.org/0009-0001-5608-5109"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Zhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128799679","display_name":"Samuel Tenka","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tenka, Samuel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124106437","display_name":"Aidan Z.H. Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Aidan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015683413","display_name":"Soonho Kong","orcid":"https://orcid.org/0000-0003-0984-8078"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kong, Soonho","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5017544719","display_name":"Udaya Ghai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghai, Udaya","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5111246507"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.33500000834465027,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.33500000834465027,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.12139999866485596,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.10029999911785126,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.7748000025749207},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6949999928474426},{"id":"https://openalex.org/keywords/partially-observable-markov-decision-process","display_name":"Partially observable Markov decision process","score":0.43560001254081726},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.41819998621940613},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.4081999957561493},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.39910000562667847},{"id":"https://openalex.org/keywords/list-decoding","display_name":"List decoding","score":0.39640000462532043},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.37770000100135803},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.3441999852657318}],"concepts":[{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.7748000025749207},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6949999928474426},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6873000264167786},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.43560001254081726},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.41819998621940613},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41620001196861267},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.4081999957561493},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.39910000562667847},{"id":"https://openalex.org/C204397858","wikidata":"https://www.wikidata.org/wiki/Q4437907","display_name":"List decoding","level":5,"score":0.39640000462532043},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.37770000100135803},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3441999852657318},{"id":"https://openalex.org/C193969084","wikidata":"https://www.wikidata.org/wiki/Q7452500","display_name":"Sequential decoding","level":4,"score":0.3434000015258789},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3384000062942505},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.3287999927997589},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3188000023365021},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3176000118255615},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.30090001225471497},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3000999987125397},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.2897000014781952},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C2778858076","wikidata":"https://www.wikidata.org/wiki/Q5249539","display_name":"Decodes","level":3,"score":0.2865999937057495},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2840999960899353},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.26809999346733093},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.25870001316070557},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25429999828338623},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2540000081062317}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.09065","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09065","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.09065","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09065","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Decoding":[0],"from":[1],"large":[2],"language":[3,53],"models":[4],"(LLMs)":[5],"typically":[6],"relies":[7],"on":[8,45,71,102,137,147,162],"fixed":[9,181,192],"sampling":[10,39,109,130],"hyperparameters":[11],"(e.g.,":[12],"temperature,":[13],"top-p),":[14],"despite":[15],"substantial":[16],"variation":[17],"in":[18],"task":[19],"difficulty":[20],"and":[21,25,65,73,106,141,150,203],"uncertainty":[22],"across":[23],"prompts":[24],"individual":[26],"decoding":[27,34,59,82,92,117],"steps.":[28],"We":[29],"propose":[30],"to":[31,172],"learn":[32],"adaptive":[33],"policies":[35],"that":[36,154],"dynamically":[37],"select":[38],"strategies":[40],"at":[41,132],"inference":[42],"time,":[43],"conditioned":[44,101],"available":[46],"compute":[47],"resources.":[48],"Rather":[49],"than":[50],"fine-tuning":[51],"the":[52,77,103,112,142,148,155,159,164,175,185,198],"model":[54,116,139],"itself,":[55],"we":[56,80,115],"introduce":[57],"lightweight":[58],"adapters":[60,157],"trained":[61],"with":[62],"reinforcement":[63],"learning":[64],"verifiable":[66],"terminal":[67],"rewards":[68],"(e.g.":[69,94],"correctness":[70],"math":[72],"coding":[74],"tasks).":[75],"At":[76,111],"sequence":[78],"level,":[79,114],"frame":[81],"as":[83,118],"a":[84,88,91,107,119,127,180],"contextual":[85],"bandit":[86],"problem:":[87],"policy":[89,128],"selects":[90,129],"strategy":[93],"greedy,":[95],"top-k,":[96],"min-p)":[97],"for":[98],"each":[99,133],"prompt,":[100],"prompt":[104],"embedding":[105],"parallel":[108,193],"budget.":[110,145],"token":[113,134,144,182],"partially":[120],"observable":[121],"Markov":[122],"decision":[123],"process":[124],"(POMDP),":[125],"where":[126],"actions":[131],"step":[135],"based":[136],"internal":[138],"features":[140],"remaining":[143],"Experiments":[146],"MATH":[149],"CodeContests":[151],"benchmarks":[152],"show":[153],"learned":[156],"improve":[158],"accuracy-budget":[160],"tradeoff:":[161],"MATH,":[163],"token-level":[165,204],"adapter":[166,187],"improves":[167],"Pass@1":[168],"accuracy":[169],"by":[170],"up":[171],"10.2%":[173],"over":[174],"best":[176],"static":[177],"baseline":[178],"under":[179,191],"budget,":[183],"while":[184],"sequence-level":[186],"yields":[188],"2-3%":[189],"gains":[190],"sampling.":[194],"Ablation":[195],"analyses":[196],"support":[197],"contribution":[199],"of":[200],"both":[201],"sequence-":[202],"adaptation.":[205]},"counts_by_year":[],"updated_date":"2026-03-12T06:18:43.230356","created_date":"2026-03-12T00:00:00"}
