{"id":"https://openalex.org/W4401176373","doi":"https://doi.org/10.1145/3651890.3672274","title":"CacheGen: KV Cache Compression and Streaming for Fast Large Language Model Serving","display_name":"CacheGen: KV Cache Compression and Streaming for Fast Large Language Model Serving","publication_year":2024,"publication_date":"2024-07-31","ids":{"openalex":"https://openalex.org/W4401176373","doi":"https://doi.org/10.1145/3651890.3672274"},"language":"en","primary_location":{"id":"doi:10.1145/3651890.3672274","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3651890.3672274","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGCOMM 2024 Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3651890.3672274","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100350539","display_name":"Yuhan Liu","orcid":"https://orcid.org/0009-0002-5957-5071"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuhan Liu","raw_affiliation_strings":["University of Chicago, Chicago, USA"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003455316","display_name":"Hanchen Li","orcid":"https://orcid.org/0009-0005-9980-028X"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hanchen Li","raw_affiliation_strings":["University of Chicago, Chicago, USA"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004850046","display_name":"Yihua Cheng","orcid":"https://orcid.org/0009-0006-3924-6886"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yihua Cheng","raw_affiliation_strings":["University of Chicago, Chicago, USA"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022254723","display_name":"Siddhant Ray","orcid":"https://orcid.org/0000-0003-0265-2144"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siddhant Ray","raw_affiliation_strings":["University of Chicago, Chicago, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, United States of America","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101513128","display_name":"Yuyang Huang","orcid":"https://orcid.org/0000-0002-8822-3115"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuyang Huang","raw_affiliation_strings":["University of Chicago, Chicago, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, United States of America","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103250112","display_name":"Qizheng Zhang","orcid":"https://orcid.org/0009-0009-3208-4601"},"institutions":[{"id":"https://openalex.org/I1743320","display_name":"Palo Alto University","ror":"https://ror.org/04f812k67","country_code":"US","type":"education","lineage":["https://openalex.org/I1743320"]},{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qizheng Zhang","raw_affiliation_strings":["Stanford University, Palo Alto, United States of America"],"affiliations":[{"raw_affiliation_string":"Stanford University, Palo Alto, United States of America","institution_ids":["https://openalex.org/I1743320","https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036272233","display_name":"Kuntai Du","orcid":"https://orcid.org/0000-0002-3964-4079"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kuntai Du","raw_affiliation_strings":["University of Chicago, Chicago, USA"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058454592","display_name":"Jiayi Yao","orcid":"https://orcid.org/0000-0002-8588-4356"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiayi Yao","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061448314","display_name":"Shan Lu","orcid":"https://orcid.org/0000-0002-0757-4600"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shan Lu","raw_affiliation_strings":["Microsoft Research, Seattle, United States of America"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Seattle, United States of America","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031071237","display_name":"Ganesh Ananthanarayanan","orcid":"https://orcid.org/0000-0002-7479-1664"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ganesh Ananthanarayanan","raw_affiliation_strings":["Microsoft, Seattle, United States of America"],"affiliations":[{"raw_affiliation_string":"Microsoft, Seattle, United States of America","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I58610484"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001371942","display_name":"Michael Maire","orcid":"https://orcid.org/0000-0002-9778-6673"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Maire","raw_affiliation_strings":["The University of Chicago, Chicago, United States of America"],"affiliations":[{"raw_affiliation_string":"The University of Chicago, Chicago, United States of America","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080833704","display_name":"Henry Hoffmann","orcid":"https://orcid.org/0000-0003-0816-8150"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Henry Hoffmann","raw_affiliation_strings":["University of Chicago, Chicago, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, United States of America","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063151917","display_name":"Ari Holtzman","orcid":null},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ari Holtzman","raw_affiliation_strings":["Meta, University of Chicago, Chicago, United States of America"],"affiliations":[{"raw_affiliation_string":"Meta, University of Chicago, Chicago, United States of America","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103258769","display_name":"Junchen Jiang","orcid":"https://orcid.org/0000-0002-6877-1683"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Junchen Jiang","raw_affiliation_strings":["University of Chicago, Chicago, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, United States of America","institution_ids":["https://openalex.org/I40347166"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5100350539"],"corresponding_institution_ids":["https://openalex.org/I40347166"],"apc_list":null,"apc_paid":null,"fwci":19.9168,"has_fulltext":false,"cited_by_count":59,"citation_normalized_percentile":{"value":0.99514536,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"38","last_page":"56"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9801999926567078,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7680065631866455},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.7498752474784851},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.604250431060791},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3478120267391205},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.07414957880973816}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7680065631866455},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.7498752474784851},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.604250431060791},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3478120267391205},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.07414957880973816},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3651890.3672274","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3651890.3672274","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGCOMM 2024 Conference","raw_type":"proceedings-article"},{"id":"pmh:oai:uchicago.tind.io:13374","is_oa":true,"landing_page_url":"http://knowledge.uchicago.edu/record/13374","pdf_url":null,"source":{"id":"https://openalex.org/S4306402460","display_name":"Knowledge@UChicago (University of Chicago)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I40347166","host_organization_name":"University of Chicago","host_organization_lineage":["https://openalex.org/I40347166"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://knowledge.uchicago.edu/record/13374","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1145/3651890.3672274","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3651890.3672274","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGCOMM 2024 Conference","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1412639289","display_name":"CNS Core: Medium: Accurate Anytime Learning for Energy andTimeliness in Software Systems","funder_award_id":"1956180","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1692647576","display_name":null,"funder_award_id":"CNS-1901466","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3118918149","display_name":null,"funder_award_id":"CCF-2119184","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G337251858","display_name":"CNS Core:Medium:Systems Challenges in Scaling Distributed Intelligent Applications","funder_award_id":"1901466","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3620354760","display_name":null,"funder_award_id":"NS-2313190","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3736266085","display_name":"CSR: Medium: Improving the Interface between Machine Learning and Software Systems","funder_award_id":"2313190","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4190001885","display_name":"CNS Core: Small: Closing the Reality Gap for Learning-Augmented Network Systems","funder_award_id":"2131826","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5064054989","display_name":"CAREER: Enabling Perception-Driven Optimization for Online Videos","funder_award_id":"2146496","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7593491178","display_name":null,"funder_award_id":"2119184","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8347821259","display_name":null,"funder_award_id":"CNS-1956180","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309626","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1494910745","https://openalex.org/W1975570633","https://openalex.org/W1993482030","https://openalex.org/W2129652681","https://openalex.org/W2167359416","https://openalex.org/W2734941459","https://openalex.org/W2806085960","https://openalex.org/W2912817604","https://openalex.org/W2962985038","https://openalex.org/W2963339397","https://openalex.org/W2965631471","https://openalex.org/W2970139579","https://openalex.org/W3015468748","https://openalex.org/W3027879771","https://openalex.org/W3081168214","https://openalex.org/W3099700870","https://openalex.org/W3105238007","https://openalex.org/W3121694563","https://openalex.org/W3131922516","https://openalex.org/W3156359583","https://openalex.org/W3156789018","https://openalex.org/W4213354024","https://openalex.org/W4241554665","https://openalex.org/W4287704453","https://openalex.org/W4313547549","https://openalex.org/W4319163914","https://openalex.org/W4321636575","https://openalex.org/W4361216835","https://openalex.org/W4362559429","https://openalex.org/W4372260356","https://openalex.org/W4372267133","https://openalex.org/W4385570688","https://openalex.org/W4386395487","https://openalex.org/W4387321091","https://openalex.org/W4387561453","https://openalex.org/W4388644518","https://openalex.org/W4388778348","https://openalex.org/W4389519226","https://openalex.org/W4392904185"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"As":[0],"large":[1,64],"language":[2],"models":[3],"(LLMs)":[4],"take":[5],"on":[6],"complex":[7],"tasks,":[8],"their":[9],"inputs":[10],"are":[11],"supplemented":[12],"with":[13],"longer":[14],"contexts":[15,23],"that":[16],"incorporate":[17],"domain":[18],"knowledge.":[19],"Yet":[20],"using":[21],"long":[22],"is":[24,35],"challenging":[25],"as":[26],"nothing":[27],"can":[28,44,69],"be":[29,45],"generated":[30],"until":[31],"the":[32,38,41,49,59,67],"whole":[33],"context":[34,54],"processed":[36],"by":[37,47],"LLM.":[39],"While":[40],"context-processing":[42],"delay":[43],"reduced":[46],"reusing":[48],"KV":[50,60],"cache":[51],"of":[52],"a":[53],"across":[55],"different":[56],"inputs,":[57],"fetching":[58],"cache,":[61],"which":[62],"contains":[63],"tensors,":[65],"over":[66],"network":[68,73],"cause":[70],"high":[71],"extra":[72],"delays.":[74]},"counts_by_year":[{"year":2026,"cited_by_count":15},{"year":2025,"cited_by_count":43},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
