{"id":"https://openalex.org/W7154698225","doi":"https://doi.org/10.48550/arxiv.2604.15244","title":"From Tokens to Steps: Verification-Aware Speculative Decoding for Efficient Multi-Step Reasoning","display_name":"From Tokens to Steps: Verification-Aware Speculative Decoding for Efficient Multi-Step Reasoning","publication_year":2026,"publication_date":"2026-04-16","ids":{"openalex":"https://openalex.org/W7154698225","doi":"https://doi.org/10.48550/arxiv.2604.15244"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.15244","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15244","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.15244","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133844913","display_name":"Kiran Purohit","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Purohit, Kiran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071999741","display_name":"Ramasuri Narayanam","orcid":"https://orcid.org/0000-0003-3289-3950"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Narayanam, Ramasuri","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5087774360","display_name":"Saheb Pal","orcid":"https://orcid.org/0000-0003-1867-2473"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pal, Soumyabrata","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2955999970436096,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2955999970436096,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.15119999647140503,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.11599999666213989,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.7264000177383423},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5444999933242798},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4260999858379364},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.39500001072883606},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.35899999737739563},{"id":"https://openalex.org/keywords/authorship-attribution","display_name":"Authorship attribution","score":0.35580000281333923}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7961999773979187},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.7264000177383423},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5444999933242798},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47450000047683716},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4260999858379364},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.39500001072883606},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.35899999737739563},{"id":"https://openalex.org/C3020202489","wikidata":"https://www.wikidata.org/wiki/Q2032038","display_name":"Authorship attribution","level":2,"score":0.35580000281333923},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.35350000858306885},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.32170000672340393},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31369999051094055},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.31200000643730164},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2985000014305115},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.272599995136261},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2572000026702881},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.25529998540878296},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.15244","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15244","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.15244","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.15244","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Speculative":[0],"decoding":[1,55],"(SD)":[2],"accelerates":[3],"large":[4],"language":[5],"model":[6,13,21],"inference":[7],"by":[8,144,149],"allowing":[9],"a":[10,18,52,108,120,134],"lightweight":[11,88],"draft":[12,71],"to":[14,30,99],"propose":[15,50],"outputs":[16],"that":[17,57,96,111,140],"stronger":[19],"target":[20],"verifies.":[22],"However,":[23],"its":[24],"token-centric":[25],"nature":[26],"allows":[27],"erroneous":[28],"steps":[29],"propagate.":[31],"Prior":[32],"approaches":[33],"mitigate":[34],"this":[35],"using":[36,61,83,126],"external":[37],"reward":[38],"models,":[39],"but":[40],"incur":[41],"additional":[42],"latency,":[43],"computational":[44],"overhead,":[45],"and":[46,73,102,106,154],"limit":[47],"generalizability.":[48],"We":[49],"SpecGuard,":[51],"verification-aware":[53],"speculative":[54],"framework":[56],"performs":[58],"step-level":[59],"verification":[60],"only":[62],"model-internal":[63,89],"signals.":[64],"At":[65],"each":[66],"step,":[67,78],"SpecGuard":[68,141],"samples":[69],"multiple":[70],"candidates":[72],"selects":[74],"the":[75,100,127],"most":[76],"consistent":[77],"which":[79],"is":[80,122],"then":[81],"validated":[82],"an":[84,92],"ensemble":[85],"of":[86,136],"two":[87],"signals:":[90],"(i)":[91],"attention-based":[93],"grounding":[94],"score":[95,110],"measures":[97],"attribution":[98],"input":[101],"previously":[103],"accepted":[104,123],"steps,":[105],"(ii)":[107],"log-probability-based":[109],"captures":[112],"token-level":[113],"confidence.":[114],"These":[115],"signals":[116],"jointly":[117],"determine":[118],"whether":[119],"step":[121],"or":[124],"recomputed":[125],"target,":[128],"allocating":[129],"compute":[130],"selectively.":[131],"Experiments":[132],"across":[133],"range":[135],"reasoning":[137],"benchmarks":[138],"show":[139],"improves":[142],"accuracy":[143],"3.6%":[145],"while":[146],"reducing":[147],"latency":[148],"~11%,":[150],"outperforming":[151],"both":[152],"SD":[153],"reward-guided":[155],"SD.":[156]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-18T00:00:00"}
