{"id":"https://openalex.org/W4416035230","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.1732","title":"Lookahead Q-Cache: Achieving More Consistent KV Cache Eviction via Pseudo Query","display_name":"Lookahead Q-Cache: Achieving More Consistent KV Cache Eviction via Pseudo Query","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416035230","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.1732"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.1732","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.1732","pdf_url":"https://aclanthology.org/2025.emnlp-main.1732.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.1732.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055492705","display_name":"Yixuan Wang","orcid":"https://orcid.org/0000-0001-5491-1422"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yixuan Wang","raw_affiliation_strings":["Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China"],"affiliations":[{"raw_affiliation_string":"Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119938219","display_name":"Shiyu Ji","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiyu Ji","raw_affiliation_strings":["Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China"],"affiliations":[{"raw_affiliation_string":"Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100365627","display_name":"Yijun Liu","orcid":"https://orcid.org/0000-0002-4228-4684"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yijun Liu","raw_affiliation_strings":["Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China"],"affiliations":[{"raw_affiliation_string":"Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101853550","display_name":"Yue Xu","orcid":"https://orcid.org/0000-0003-2518-7273"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuzhuang Xu","raw_affiliation_strings":["Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China"],"affiliations":[{"raw_affiliation_string":"Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100694150","display_name":"Yang Xu","orcid":"https://orcid.org/0000-0001-9976-4403"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Xu","raw_affiliation_strings":["Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China"],"affiliations":[{"raw_affiliation_string":"Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101903539","display_name":"Qingling Zhu","orcid":"https://orcid.org/0000-0002-0925-9903"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingfu Zhu","raw_affiliation_strings":["Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China"],"affiliations":[{"raw_affiliation_string":"Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5119938220","display_name":"Wanxiang Che","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wanxiang Che","raw_affiliation_strings":["Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China"],"affiliations":[{"raw_affiliation_string":"Research Center for Social Computing and Interactive Robotics , Harbin Institute of Technology , China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5055492705"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37335798,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"34146","last_page":"34162"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.12479999661445618,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.12479999661445618,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.10610000044107437,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.08829999715089798,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.3357999920845032},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.3321000039577484},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.30300000309944153},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.2842000126838684},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.2784999907016754}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6680999994277954},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.42579999566078186},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.3357999920845032},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.3321000039577484},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.289000004529953},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.2842000126838684},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2784999907016754},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C2778049214","wikidata":"https://www.wikidata.org/wiki/Q7512234","display_name":"Sigma","level":2,"score":0.2526000142097473},{"id":"https://openalex.org/C187455244","wikidata":"https://www.wikidata.org/wiki/Q942353","display_name":"Boolean function","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.1732","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.1732","pdf_url":"https://aclanthology.org/2025.emnlp-main.1732.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.1732","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.1732","pdf_url":"https://aclanthology.org/2025.emnlp-main.1732.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2504063345","display_name":null,"funder_award_id":"6244160","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2565984771","display_name":null,"funder_award_id":"62236004","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3596954764","display_name":null,"funder_award_id":"62476073","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4035621038","display_name":null,"funder_award_id":"62441603","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416035230.pdf","grobid_xml":"https://content.openalex.org/works/W4416035230.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"rely":[4],"on":[5,104,125],"keyvalue":[6],"cache":[7,19,34,96,129],"(KV":[8],"cache)":[9],"to":[10,71,134,142],"accelerate":[11],"decoding":[12],"by":[13],"reducing":[14],"redundant":[15],"computations.However,":[16],"the":[17,74,83],"KV":[18,33,95],"memory":[20,52],"usage":[21],"grows":[22],"substantially":[23],"with":[24,45,99],"longer":[25],"text":[26],"sequences,":[27],"posing":[28],"challenges":[29],"for":[30,86],"efficient":[31],"deployment.Existing":[32],"eviction":[35,63,97],"methods":[36,114],"prune":[37],"tokens":[38],"using":[39,78],"prefilling-stage":[40],"attention":[41],"scores,":[42],"causing":[43],"inconsistency":[44],"actual":[46],"inference":[47,101],"queries,":[48],"especially":[49],"under":[50,127],"tight":[51],"budgets.In":[53],"this":[54],"paper,":[55],"we":[56],"propose":[57],"Lookahead":[58],"Q-Cache":[59],"(LAQ),":[60],"a":[61,120],"novel":[62],"framework":[64],"that":[65,110],"generates":[66],"lowcost":[67],"pseudo":[68],"lookahead":[69,80],"queries":[70,81],"better":[72],"approximate":[73],"true":[75],"decoding-stage":[76],"queries.By":[77],"these":[79],"as":[82],"observation":[84],"window":[85],"importance":[87],"estimation,":[88],"LAQ":[89,111,131],"achieves":[90],"more":[91],"consistent":[92],"and":[93,106,137],"accurate":[94],"aligned":[98],"real":[100],"scenarios.Experimental":[102],"results":[103],"LongBench":[105,126],"Needlein-a-Haystack":[107],"benchmarks":[108],"show":[109],"outperforms":[112],"existing":[113,135],"across":[115],"various":[116],"budget":[117],"levels,":[118],"achieving":[119],"1":[121],"4":[122],"point":[123],"improvement":[124],"limited":[128],"budget.Moreover,":[130],"is":[132],"complementary":[133],"approaches":[136],"can":[138],"be":[139],"flexibly":[140],"combined":[141],"yield":[143],"further":[144],"improvements.":[145]},"counts_by_year":[],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-11-08T00:00:00"}
