{"id":"https://openalex.org/W7133297553","doi":"https://doi.org/10.48550/arxiv.2603.00907","title":"KVSlimmer: Theoretical Insights and Practical Optimizations for Asymmetric KV Merging","display_name":"KVSlimmer: Theoretical Insights and Practical Optimizations for Asymmetric KV Merging","publication_year":2026,"publication_date":"2026-03-01","ids":{"openalex":"https://openalex.org/W7133297553","doi":"https://doi.org/10.48550/arxiv.2603.00907"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.00907","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127962193","display_name":"Lianjun Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Lianjun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127958792","display_name":"Hongli An","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"An, Hongli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087857252","display_name":"Weiqi Yan","orcid":"https://orcid.org/0000-0002-7443-3285"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Weiqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127948813","display_name":"Xin Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Xin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127946926","display_name":"Shengchuan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shengchuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127940420","display_name":"Huazhong Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Huazhong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127925591","display_name":"Yunshan Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Yunshan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.1356000006198883,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.1356000006198883,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.12039999663829803,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.09030000120401382,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hessian-matrix","display_name":"Hessian matrix","score":0.6746000051498413},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5504000186920166},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.5152000188827515},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.49790000915527344},{"id":"https://openalex.org/keywords/asymmetry","display_name":"Asymmetry","score":0.4803999960422516},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.46860000491142273},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.4016000032424927},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.38769999146461487}],"concepts":[{"id":"https://openalex.org/C203616005","wikidata":"https://www.wikidata.org/wiki/Q620495","display_name":"Hessian matrix","level":2,"score":0.6746000051498413},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.599399983882904},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5504000186920166},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5440000295639038},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.5152000188827515},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.49790000915527344},{"id":"https://openalex.org/C38976095","wikidata":"https://www.wikidata.org/wiki/Q752641","display_name":"Asymmetry","level":2,"score":0.4803999960422516},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.46860000491142273},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.40860000252723694},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.4016000032424927},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.38769999146461487},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.3797000050544739},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.34630000591278076},{"id":"https://openalex.org/C2775937380","wikidata":"https://www.wikidata.org/wiki/Q1232589","display_name":"Replica","level":2,"score":0.33079999685287476},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.32280001044273376},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.2928999960422516},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.28839999437332153},{"id":"https://openalex.org/C520416788","wikidata":"https://www.wikidata.org/wiki/Q5419229","display_name":"Exact solutions in general relativity","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.27790001034736633},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.26190000772476196}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.00907","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.00907","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.00907","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.00907","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.767831027507782,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"growing":[1],"computational":[2],"and":[3,39,47,51,110,128,135,160,164],"memory":[4,158],"demands":[5],"of":[6,15,36,72],"the":[7,13,68,150],"Key-Value":[8],"(KV)":[9],"cache":[10],"significantly":[11],"limit":[12],"ability":[14],"Large":[16],"Language":[17],"Models":[18],"(LLMs).":[19],"While":[20],"KV":[21,37],"merging":[22],"has":[23],"emerged":[24],"as":[25],"a":[26,44,60,106,112,121],"promising":[27],"solution,":[28],"existing":[29],"methods":[30],"that":[31,63,76,100,124,138],"rely":[32],"on":[33,146],"empirical":[34],"observations":[35],"asymmetry":[38,66],"gradient-based":[40],"Hessian":[41,103],"approximations":[42],"lack":[43],"theoretical":[45,61],"foundation":[46],"incur":[48],"suboptimal":[49],"compression":[50],"inference":[52],"overhead.":[53],"To":[54],"bridge":[55],"these":[56],"gaps,":[57],"we":[58,94],"establish":[59],"framework":[62],"characterizes":[64],"this":[65],"through":[67,105],"spectral":[69],"energy":[70],"distribution":[71],"projection":[73],"weights,":[74],"demonstrating":[75],"concentrated":[77],"spectra":[78,87],"in":[79,88,120],"Query/Key":[80],"weights":[81,90],"induce":[82],"feature":[83],"homogeneity,":[84],"whereas":[85],"dispersed":[86],"Value":[89],"preserve":[91],"heterogeneity.":[92],"Then,":[93],"introduce":[95],"KVSlimmer,":[96],"an":[97],"efficient":[98],"algorithm":[99],"captures":[101],"exact":[102,108],"information":[104],"mathematically":[107],"formulation,":[109],"derives":[111],"closed-form":[113],"solution":[114],"utilizing":[115],"only":[116],"forward-pass":[117],"variables,":[118],"resulting":[119],"gradient-free":[122],"approach":[123],"is":[125,167],"both":[126],"memory-":[127],"time-efficient.":[129],"Extensive":[130],"experiments":[131],"across":[132],"various":[133],"models":[134],"benchmarks":[136],"demonstrate":[137],"KVSlimmer":[139],"consistently":[140],"outperforms":[141],"SOTA":[142],"methods.":[143],"For":[144],"instance,":[145],"Llama3.1-8B-Instruct,":[147],"it":[148],"improves":[149],"LongBench":[151],"average":[152],"score":[153],"by":[154,162],"0.92":[155],"while":[156],"reducing":[157],"costs":[159],"latency":[161],"29%":[163],"28%,":[165],"respectively.Code":[166],"available":[168],"at":[169],"https://github.com/lianjunl13-sudo/KVSlimmer.":[170]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-04T00:00:00"}
