{"id":"https://openalex.org/W4408848702","doi":"https://doi.org/10.1145/3689031.3696098","title":"CacheBlend: Fast Large Language Model Serving for RAG with Cached Knowledge Fusion","display_name":"CacheBlend: Fast Large Language Model Serving for RAG with Cached Knowledge Fusion","publication_year":2025,"publication_date":"2025-03-26","ids":{"openalex":"https://openalex.org/W4408848702","doi":"https://doi.org/10.1145/3689031.3696098"},"language":"en","primary_location":{"id":"doi:10.1145/3689031.3696098","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3689031.3696098","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twentieth European Conference on Computer Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3689031.3696098","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058454592","display_name":"Jiayi Yao","orcid":"https://orcid.org/0000-0002-8588-4356"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK","US"],"is_corresponding":true,"raw_author_name":"Jiayi Yao","raw_affiliation_strings":["University of Chicago/CUHK Shenzhen"],"raw_orcid":"https://orcid.org/0000-0002-8588-4356","affiliations":[{"raw_affiliation_string":"University of Chicago/CUHK Shenzhen","institution_ids":["https://openalex.org/I40347166","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003455316","display_name":"Hanchen Li","orcid":"https://orcid.org/0009-0005-9980-028X"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hanchen Li","raw_affiliation_strings":["University of Chicago"],"raw_orcid":"https://orcid.org/0009-0005-9980-028X","affiliations":[{"raw_affiliation_string":"University of Chicago","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100350539","display_name":"Yuhan Liu","orcid":"https://orcid.org/0009-0002-5957-5071"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuhan Liu","raw_affiliation_strings":["University of Chicago"],"raw_orcid":"https://orcid.org/0009-0002-5957-5071","affiliations":[{"raw_affiliation_string":"University of Chicago","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022254723","display_name":"Siddhant Ray","orcid":"https://orcid.org/0000-0003-0265-2144"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siddhant Ray","raw_affiliation_strings":["University of Chicago"],"raw_orcid":"https://orcid.org/0000-0003-0265-2144","affiliations":[{"raw_affiliation_string":"University of Chicago","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004850046","display_name":"Yihua Cheng","orcid":"https://orcid.org/0009-0006-3924-6886"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yihua Cheng","raw_affiliation_strings":["University of Chicago"],"raw_orcid":"https://orcid.org/0009-0006-3924-6886","affiliations":[{"raw_affiliation_string":"University of Chicago","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103250112","display_name":"Qizheng Zhang","orcid":"https://orcid.org/0009-0009-3208-4601"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qizheng Zhang","raw_affiliation_strings":["Stanford University"],"raw_orcid":"https://orcid.org/0009-0009-3208-4601","affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036272233","display_name":"Kuntai Du","orcid":"https://orcid.org/0000-0002-3964-4079"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kuntai Du","raw_affiliation_strings":["University of Chicago"],"raw_orcid":"https://orcid.org/0000-0002-3964-4079","affiliations":[{"raw_affiliation_string":"University of Chicago","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061448314","display_name":"Shan Lu","orcid":"https://orcid.org/0000-0002-0757-4600"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]},{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Shan Lu","raw_affiliation_strings":["Microsoft Research / University of Chicago"],"raw_orcid":"https://orcid.org/0000-0002-0757-4600","affiliations":[{"raw_affiliation_string":"Microsoft Research / University of Chicago","institution_ids":["https://openalex.org/I4210164937","https://openalex.org/I40347166"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103258769","display_name":"Junchen Jiang","orcid":"https://orcid.org/0000-0002-6877-1683"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Junchen Jiang","raw_affiliation_strings":["University of Chicago"],"raw_orcid":"https://orcid.org/0000-0002-6877-1683","affiliations":[{"raw_affiliation_string":"University of Chicago","institution_ids":["https://openalex.org/I40347166"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5058454592"],"corresponding_institution_ids":["https://openalex.org/I40347166","https://openalex.org/I889458895"],"apc_list":null,"apc_paid":null,"fwci":66.3359,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.99903086,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"94","last_page":"109"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7235682010650635},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6000972390174866},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.47561508417129517},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.2596510648727417},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.08447065949440002}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7235682010650635},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6000972390174866},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.47561508417129517},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2596510648727417},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.08447065949440002},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3689031.3696098","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3689031.3696098","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twentieth European Conference on Computer Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3689031.3696098","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3689031.3696098","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twentieth European Conference on Computer Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2173213060","https://openalex.org/W2970641574","https://openalex.org/W3190126809","https://openalex.org/W3214693004","https://openalex.org/W4287030984","https://openalex.org/W4299585995","https://openalex.org/W4382319938","https://openalex.org/W4385245566","https://openalex.org/W4386395487","https://openalex.org/W4387321091","https://openalex.org/W4387561549","https://openalex.org/W4387995158","https://openalex.org/W4388584589","https://openalex.org/W4388778348","https://openalex.org/W4388979610","https://openalex.org/W4389326242","https://openalex.org/W4389984066","https://openalex.org/W4392736881","https://openalex.org/W4392971568","https://openalex.org/W4393853379","https://openalex.org/W4394866643","https://openalex.org/W4399252473","https://openalex.org/W4402671835","https://openalex.org/W4404401017","https://openalex.org/W6734897383","https://openalex.org/W6739901393","https://openalex.org/W6778883912","https://openalex.org/W6862282697","https://openalex.org/W6862776294","https://openalex.org/W6891815739"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"often":[4],"incorporate":[5],"multiple":[6],"text":[7,35,56],"chunks":[8,57],"in":[9],"their":[10],"inputs":[11],"to":[12],"provide":[13],"the":[14,20,23,30,38,42,47,54,61,75,79,83],"necessary":[15],"contexts.":[16],"To":[17],"speed":[18],"up":[19],"prefill":[21],"of":[22,33,49,85],"long":[24],"LLM":[25,51],"inputs,":[26],"one":[27],"can":[28],"pre-compute":[29],"KV":[31,39,67,87],"cache":[32,40],"a":[34],"and":[36],"re-use":[37],"when":[41],"context":[43],"is":[44],"reused":[45,55],"as":[46],"prefix":[48],"another":[50],"input.":[52],"However,":[53],"are":[58],"not":[59,69],"always":[60],"input":[62],"prefix,":[63],"which":[64],"makes":[65],"precomputed":[66],"caches":[68,88],"directly":[70],"usable":[71],"since":[72],"they":[73],"ignore":[74],"text's":[76],"cross-attention":[77],"with":[78],"preceding":[80],"texts.":[81],"Thus,":[82],"benefits":[84],"reusing":[86],"remain":[89],"largely":[90],"unrealized.":[91]},"counts_by_year":[{"year":2026,"cited_by_count":20},{"year":2025,"cited_by_count":16}],"updated_date":"2026-05-30T09:04:40.226872","created_date":"2025-10-10T00:00:00"}
