{"id":"https://openalex.org/W4416251440","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228214","title":"Initial-Key Cache: An Efficient KV Cache Strategy Focusing on Initial and Key Tokens for LLMs","display_name":"Initial-Key Cache: An Efficient KV Cache Strategy Focusing on Initial and Key Tokens for LLMs","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416251440","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228214"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228214","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228214","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007496299","display_name":"Zhongyi Tang","orcid":"https://orcid.org/0000-0003-3635-4911"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhongyi Tang","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047498670","display_name":"Zejiang He","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zejiang He","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037428152","display_name":"Junzhong Shen","orcid":"https://orcid.org/0000-0001-6233-6800"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junzhong Shen","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016199067","display_name":"Yang Hu","orcid":"https://orcid.org/0000-0002-6255-6376"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiyue Hu","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053957520","display_name":"Lilin Zhou","orcid":"https://orcid.org/0000-0003-0878-2791"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Luchen Zhou","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120499607","display_name":"Yongzhang Nie","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongzhang Nie","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070632746","display_name":"Yongwen Wang","orcid":"https://orcid.org/0009-0008-2514-2052"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongwen Wang","raw_affiliation_strings":["National University of Defense Technology,College of Computer Science and Technology,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,College of Computer Science and Technology,China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5007496299"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18279585,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.25380000472068787,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.25380000472068787,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.22290000319480896,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.09989999979734421,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.8152999877929688},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6690000295639038},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6035000085830688},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3978999853134155},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.36550000309944153},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.32710000872612}],"concepts":[{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.8152999877929688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7865999937057495},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6690000295639038},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6035000085830688},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3978999853134155},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.36550000309944153},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.32710000872612},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.32679998874664307},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3052999973297119},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.3034999966621399},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.27149999141693115},{"id":"https://openalex.org/C166052673","wikidata":"https://www.wikidata.org/wiki/Q83021","display_name":"Empirical evidence","level":2,"score":0.26190000772476196}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228214","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228214","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322843","display_name":"Natural Science Foundation of\u00a0Hunan Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1632114991","https://openalex.org/W2890894339","https://openalex.org/W2933138175","https://openalex.org/W2984100107","https://openalex.org/W2998617917","https://openalex.org/W4385569780","https://openalex.org/W4387321091","https://openalex.org/W4387968057","https://openalex.org/W4401176373","https://openalex.org/W4401211590","https://openalex.org/W4402671766","https://openalex.org/W4416035984"],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"have":[4,11],"developed":[5],"rapidly":[6],"in":[7,15,28,152,202],"recent":[8],"years":[9],"and":[10,38,105,156,228],"demonstrated":[12],"excellent":[13],"performance":[14,119,233],"various":[16],"application":[17],"fields.":[18],"Despite":[19],"their":[20,36],"outstanding":[21],"performance,":[22],"LLMs":[23,58],"also":[24,93,206],"introduce":[25],"significant":[26,151],"challenges":[27],"the":[29,43,47,55,69,76,81,101,118,121,129,160,167,170,176,196,199,209,232,235,243,255],"practical":[30],"inference":[31,56,104],"process,":[32],"mainly":[33],"because":[34],"of":[35,46,57,103,120,169,198,211,218,234],"computational":[37],"memory-intensive":[39],"characteristics.":[40],"Due":[41],"to":[42,74,80,164,230],"autoregressive":[44],"nature":[45],"attention":[48,130,135,161],"mechanism,":[49],"KV":[50,77,187,247],"caching":[51],"can":[52],"effectively":[53,194],"accelerate":[54],"by":[59],"substituting":[60],"quadratic-complexity":[61],"computation":[62],"with":[63],"linear-complexity":[64],"memory":[65,249],"accesses.":[66],"However,":[67],"during":[68],"calculation,":[70],"it":[71],"is":[72],"necessary":[73],"transfer":[75],"cache":[78,188,248],"value":[79],"computing":[82],"unit,":[83],"which":[84],"not":[85,149,192,253],"only":[86,193],"requires":[87],"a":[88,114,216],"larger":[89],"storage":[90,98,111],"capacity":[91],"but":[92,205],"places":[94],"higher":[95],"demands":[96],"on":[97,175,220],"bandwidth.":[99],"In":[100,123],"process":[102],"text":[106],"generation":[107],"for":[108],"long":[109,203],"texts,":[110],"has":[112],"become":[113],"bottleneck":[115],"that":[116,133,159,191,242],"limits":[117],"inference.":[122],"this":[124],"paper,":[125],"we":[126,181],"first":[127],"noticed":[128],"sink":[131,162,200],"phenomenon":[132,201],"high":[134],"scores":[136],"are":[137,148],"allocated":[138],"towards":[139],"initial":[140],"tokens":[141],"as":[142,166],"\"attention":[143],"sink\"":[144],"even":[145],"if":[146],"they":[147],"semantically":[150],"short":[153],"input":[154,171],"sentences":[155],"further":[157],"observed":[158],"tends":[163],"diminish":[165],"length":[168],"sentence":[172],"increases.":[173],"Based":[174],"above":[177],"preliminary":[178],"empirical":[179],"results,":[180],"proposed":[182],"Initial-key":[183],"Cache,":[184],"an":[185],"efficient":[186],"selection":[189],"algorithm":[190],"addresses":[195],"disappearance":[197],"inputs":[204],"comprehensively":[207],"considers":[208],"importance":[210],"middle-sentence":[212],"tokens.":[213],"We":[214],"conducted":[215],"series":[217],"experiments":[219],"four":[221],"baseline":[222],"models,":[223],"namely":[224],"LLaMA,":[225],"Qwen,":[226],"Pythia,":[227],"OPT,":[229],"assess":[231],"Initial-Key":[236,244],"Cache.":[237],"The":[238],"experiment":[239],"results":[240],"indicate":[241],"Cache":[245],"saves":[246],"usage,":[250],"while":[251],"almost":[252],"losing":[254],"model\u2019s":[256],"accuracy.":[257]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-14T00:00:00"}
