{"id":"https://openalex.org/W7156107761","doi":"https://doi.org/10.48550/arxiv.2604.22312","title":"Guess-Verify-Refine: Data-Aware Top-K for Sparse-Attention Decoding on Blackwell via Temporal Correlation","display_name":"Guess-Verify-Refine: Data-Aware Top-K for Sparse-Attention Decoding on Blackwell via Temporal Correlation","publication_year":2026,"publication_date":"2026-04-24","ids":{"openalex":"https://openalex.org/W7156107761","doi":"https://doi.org/10.48550/arxiv.2604.22312"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.22312","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22312","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.22312","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134662050","display_name":"Long Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Long","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033038201","display_name":"Ritchie Zhao","orcid":"https://orcid.org/0000-0003-1656-9165"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Ritchie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134689551","display_name":"Timmy Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Timmy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034923984","display_name":"Mindy Li","orcid":"https://orcid.org/0000-0002-5957-7918"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Mindy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134715686","display_name":"Xianjie Qiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiao, Xianjie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102901210","display_name":"Kefeng Duan","orcid":"https://orcid.org/0009-0001-6731-3349"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Kefeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134678463","display_name":"Yu-Jung Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yu-Jung","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134733657","display_name":"Xiaoming Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xiaoming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035073741","display_name":"Bita Darvish Rouhani","orcid":"https://orcid.org/0000-0002-8412-4320"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rouhani, Bita Darvish","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134725248","display_name":"June Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, June","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.12880000472068787,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.12880000472068787,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.11460000276565552,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.09839999675750732,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.620199978351593},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6198999881744385},{"id":"https://openalex.org/keywords/toeplitz-matrix","display_name":"Toeplitz matrix","score":0.5899999737739563},{"id":"https://openalex.org/keywords/stack","display_name":"Stack (abstract data type)","score":0.4878999888896942},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.46860000491142273},{"id":"https://openalex.org/keywords/correlation","display_name":"Correlation","score":0.4496999979019165},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.41940000653266907}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6832000017166138},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.620199978351593},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6198999881744385},{"id":"https://openalex.org/C147710293","wikidata":"https://www.wikidata.org/wiki/Q849428","display_name":"Toeplitz matrix","level":2,"score":0.5899999737739563},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5810999870300293},{"id":"https://openalex.org/C9395851","wikidata":"https://www.wikidata.org/wiki/Q177929","display_name":"Stack (abstract data type)","level":2,"score":0.4878999888896942},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.46860000491142273},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.4496999979019165},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.41940000653266907},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.37610000371932983},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.36070001125335693},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.3418999910354614},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.33709999918937683},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3093999922275543},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.25920000672340393},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.25769999623298645}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.22312","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22312","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.22312","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22312","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Sparse-attention":[0],"decoders":[1,211],"rely":[2],"on":[3,59,129,202],"exact":[4,53,103],"Top-K":[5,23,54,75,159,214],"selection":[6,104],"to":[7,84,112,150,172,209],"choose":[8],"the":[9,38,72,113,127,144,197,204],"most":[10],"important":[11],"key-value":[12],"entries":[13],"for":[14,56],"each":[15],"query":[16,29],"token.":[17],"In":[18,161],"long-context":[19],"LLM":[20],"serving,":[21],"this":[22,110],"stage":[24],"runs":[25],"once":[26],"per":[27,152,154],"decode":[28,68],"and":[30,40,101,125,183,194],"becomes":[31],"a":[32,51,77,85,98],"meaningful":[33],"latency":[34],"bottleneck":[35],"even":[36],"when":[37],"indexer":[39,123],"attention":[41],"kernels":[42],"are":[43],"already":[44],"highly":[45],"optimized.":[46],"We":[47,108],"present":[48],"\\textbf{Guess-Verify-Refine":[49],"(GVR)},":[50],"data-aware":[52],"algorithm":[55],"sparse-attention":[57,210],"decoding":[58],"NVIDIA":[60],"Blackwell.":[61],"GVR":[62,136],"exploits":[63],"temporal":[64,216],"correlation":[65],"across":[66],"consecutive":[67],"steps:":[69],"it":[70,166],"uses":[71],"previous":[73],"step's":[74],"as":[76],"prediction":[78],"signal,":[79],"computes":[80],"pre-indexed":[81],"statistics,":[82],"narrows":[83],"valid":[86],"threshold":[87],"by":[88,170],"secant-style":[89],"counting":[90],"in":[91,105,196],"1-2":[92],"global":[93],"passes,":[94],"verifies":[95],"candidates":[96],"with":[97,148,177],"ballot-free":[99],"collector,":[100],"finishes":[102],"shared":[106],"memory.":[107],"connect":[109],"behavior":[111],"Toeplitz":[114],"/":[115],"RoPE":[116],"structure":[117],"of":[118],"DeepSeek":[119],"Sparse":[120],"Attention":[121],"(DSA)":[122],"scores":[124],"validate":[126],"design":[128],"real":[130],"DeepSeek-V3.2":[131],"workloads":[132],"integrated":[133],"into":[134],"TensorRT-LLM.":[135],"achieves":[137],"an":[138],"average":[139],"\\textbf{1.88x}":[140],"single-operator":[141],"speedup":[142],"over":[143],"production":[145],"radix-select":[146],"kernel,":[147],"up":[149,171],"\\textbf{2.42x}":[151],"layer":[153],"step,":[155],"while":[156],"preserving":[157],"bit-exact":[158],"outputs.":[160],"controlled":[162],"TEP8":[163],"min-latency":[164],"deployment,":[165],"improves":[167],"end-to-end":[168],"TPOT":[169],"\\textbf{7.52%}":[173],"at":[174,180],"100K":[175],"context,":[176],"larger":[178],"gains":[179,188],"longer":[181],"contexts":[182],"smaller":[184],"but":[185],"still":[186],"positive":[187],"under":[189],"speculative":[190],"decoding.":[191],"While":[192],"implemented":[193],"validated":[195],"current":[198],"TensorRT-LLM":[199],"DSA":[200],"stack":[201],"Blackwell,":[203],"same":[205],"principle":[206],"may":[207],"extend":[208],"whose":[212],"decode-phase":[213],"exhibits":[215],"stability.":[217]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-28T00:00:00"}
