{"id":"https://openalex.org/W7162119876","doi":"https://doi.org/10.48550/arxiv.2605.21649","title":"EntmaxKV: Support-Aware Decoding for Entmax Attention","display_name":"EntmaxKV: Support-Aware Decoding for Entmax Attention","publication_year":2026,"publication_date":"2026-05-20","ids":{"openalex":"https://openalex.org/W7162119876","doi":"https://doi.org/10.48550/arxiv.2605.21649"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.21649","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21649","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.21649","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136760150","display_name":"Gon\u00e7alo Duarte","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duarte, Gon\u00e7alo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066732273","display_name":"Miguel Couceiro","orcid":"https://orcid.org/0000-0003-2316-7623"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Couceiro, Miguel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136475772","display_name":"Marcos V. Treviso","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Treviso, Marcos V.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.2775000035762787,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.2775000035762787,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.16189999878406525,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.12330000102519989,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.8830999732017517},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6036999821662903},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5892999768257141},{"id":"https://openalex.org/keywords/sequential-decoding","display_name":"Sequential decoding","score":0.5569999814033508},{"id":"https://openalex.org/keywords/list-decoding","display_name":"List decoding","score":0.4943000078201294},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.47130000591278076},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4147999882698059},{"id":"https://openalex.org/keywords/fraction","display_name":"Fraction (chemistry)","score":0.41119998693466187}],"concepts":[{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.8830999732017517},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7215999960899353},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6036999821662903},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5892999768257141},{"id":"https://openalex.org/C193969084","wikidata":"https://www.wikidata.org/wiki/Q7452500","display_name":"Sequential decoding","level":4,"score":0.5569999814033508},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5175999999046326},{"id":"https://openalex.org/C204397858","wikidata":"https://www.wikidata.org/wiki/Q4437907","display_name":"List decoding","level":5,"score":0.4943000078201294},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.47130000591278076},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4147999882698059},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.41119998693466187},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.40299999713897705},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.39309999346733093},{"id":"https://openalex.org/C106195933","wikidata":"https://www.wikidata.org/wiki/Q7847935","display_name":"Truncation (statistics)","level":2,"score":0.36550000309944153},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3472000062465668},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.34689998626708984},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.2957000136375427},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2903999984264374},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.2856000065803528},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.2800000011920929},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.26260000467300415},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2549000084400177},{"id":"https://openalex.org/C2777317252","wikidata":"https://www.wikidata.org/wiki/Q18393516","display_name":"Rare events","level":2,"score":0.2540999948978424}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.21649","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21649","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.21649","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21649","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Long-context":[0],"decoding":[1,26,62,78,94,118,208],"is":[2,107,154,164],"increasingly":[3],"limited":[4],"by":[5,31,156],"KV-cache":[6],"memory":[7],"traffic":[8],"since":[9],"each":[10],"generated":[11],"token":[12],"attends":[13],"over":[14,242],"a":[15,169,226],"cache":[16,104],"whose":[17,44],"size":[18],"grows":[19],"linearly":[20],"with":[21],"context":[22,248],"length.":[23,249],"Existing":[24],"sparse":[25,61,77,117,137,207],"methods":[27],"reduce":[28],"this":[29,110],"cost":[30],"selecting":[32],"subsets":[33],"of":[34,229],"tokens":[35],"or":[36],"pages,":[37],"but":[38],"are":[39,126],"designed":[40],"for":[41],"softmax":[42],"attention,":[43],"dense":[45,97],"tails":[46],"make":[47],"any":[48],"truncation":[49,142],"discard":[50],"nonzero":[51],"probability":[52,147,194],"mass.":[53],"In":[54,109],"contrast,":[55],"$\u03b1$-entmax":[56],"produces":[57],"exact":[58],"zeros,":[59],"turning":[60],"from":[63,178],"dense-tail":[64],"approximation":[65],"into":[66],"support":[67,163,198],"recovery:":[68],"if":[69],"the":[70,74,92,101,145,161,175,183,187,230],"selected":[71,184],"candidates":[72],"contain":[73],"entmax":[75,83,138,162,171,176,223],"support,":[76],"remains":[79],"exact.":[80],"While":[81],"recent":[82],"kernels":[84],"enable":[85],"efficient":[86],"training,":[87],"they":[88],"do":[89],"not":[90],"address":[91],"autoregressive":[93],"bottleneck,":[95],"where":[96],"inference":[98],"still":[99],"streams":[100],"full":[102,243],"KV":[103,124,211,231],"before":[105,123],"sparsity":[106,122],"known.":[108],"work,":[111],"we":[112],"introduce":[113,168],"EntmaxKV,":[114],"an":[115],"entmax-native":[116],"framework":[119],"that":[120,151,173],"exploits":[121],"pages":[125],"loaded.":[127],"EntmaxKV":[128,191],"combines":[129],"query-aware":[130],"page":[131,180],"scoring,":[132],"support-aware":[133],"candidate":[134],"selection,":[135],"and":[136,158,200,215,238],"attention.":[139],"We":[140,166],"analyze":[141],"error":[143,153,204],"through":[144],"dropped":[146],"mass":[148],"$\u03b4$,":[149],"showing":[150],"output":[152,203],"controlled":[155],"$\u03b4$":[157],"vanishes":[159],"when":[160],"recovered.":[165],"further":[167],"Gaussian-aware":[170],"selector":[172],"estimates":[174],"threshold":[177],"lightweight":[179],"statistics,":[181],"adapting":[182],"budget":[185],"to":[186,235],"score":[188],"distribution.":[189],"Empirically,":[190],"drops":[192],"less":[193],"mass,":[195],"retains":[196],"more":[197],"tokens,":[199],"achieves":[201],"lower":[202],"than":[205],"softmax-based":[206],"at":[209,246],"matched":[210],"budgets.":[212],"On":[213],"long-context":[214],"language":[216],"modeling":[217],"benchmarks,":[218],"it":[219],"closely":[220],"matches":[221],"full-cache":[222],"while":[224],"using":[225],"small":[227],"fraction":[228],"cache,":[232],"achieving":[233],"up":[234],"$3.36\\times$":[236],"(softmax)":[237],"$5.43\\times$":[239],"(entmax)":[240],"speedup":[241],"attention":[244],"baselines":[245],"1M":[247],"Code":[250],"available":[251],"at:":[252],"https://github.com/deep-spin/entmaxkv.":[253]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-23T00:00:00"}
