{"id":"https://openalex.org/W7162298914","doi":"https://doi.org/10.48550/arxiv.2605.22850","title":"ObjectCache: Layerwise Object-Storage Retrieval for KV Cache Reuse","display_name":"ObjectCache: Layerwise Object-Storage Retrieval for KV Cache Reuse","publication_year":2026,"publication_date":"2026-05-16","ids":{"openalex":"https://openalex.org/W7162298914","doi":"https://doi.org/10.48550/arxiv.2605.22850"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.22850","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22850","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.22850","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136925333","display_name":"Yu Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077061884","display_name":"Aditya Dhakal","orcid":"https://orcid.org/0000-0002-8297-8525"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dhakal, Aditya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045928410","display_name":"Yunming Xiao","orcid":"https://orcid.org/0000-0002-4913-4881"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Yunming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136971421","display_name":"Dejan Milojicic","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Milojicic, Dejan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136923240","display_name":"Gustavo Alonso","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alonso, Gustavo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.5952000021934509,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.5952000021934509,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.21130000054836273,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.10859999805688858,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.7498999834060669},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.46160000562667847},{"id":"https://openalex.org/keywords/dram","display_name":"Dram","score":0.4390000104904175},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.4083999991416931},{"id":"https://openalex.org/keywords/object-storage","display_name":"Object storage","score":0.40450000762939453},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.39800000190734863},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.38420000672340393},{"id":"https://openalex.org/keywords/transfer","display_name":"Transfer (computing)","score":0.37310001254081726},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.36640000343322754},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.35569998621940613}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8521999716758728},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.7498999834060669},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5412999987602234},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.4666999876499176},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.46160000562667847},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.4390000104904175},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.4083999991416931},{"id":"https://openalex.org/C56640594","wikidata":"https://www.wikidata.org/wiki/Q7075068","display_name":"Object storage","level":3,"score":0.40450000762939453},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.39800000190734863},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.38420000672340393},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.375},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.37310001254081726},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.36640000343322754},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.35569998621940613},{"id":"https://openalex.org/C36340418","wikidata":"https://www.wikidata.org/wiki/Q7124288","display_name":"Page cache","level":5,"score":0.3402999937534332},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.33160001039505005},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.3301999866962433},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3109000027179718},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.30979999899864197},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.30570000410079956},{"id":"https://openalex.org/C25536678","wikidata":"https://www.wikidata.org/wiki/Q5015977","display_name":"Cache invalidation","level":5,"score":0.3012999892234802},{"id":"https://openalex.org/C557945733","wikidata":"https://www.wikidata.org/wiki/Q389772","display_name":"Data transmission","level":2,"score":0.3012000024318695},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2964000105857849},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.2874999940395355},{"id":"https://openalex.org/C194739806","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Computer data storage","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C167713795","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"Smart Cache","level":5,"score":0.2773999869823456},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C51185590","wikidata":"https://www.wikidata.org/wiki/Q1017228","display_name":"Bus sniffing","level":5,"score":0.27309998869895935},{"id":"https://openalex.org/C105122174","wikidata":"https://www.wikidata.org/wiki/Q322202","display_name":"Garbage collection","level":3,"score":0.27250000834465027},{"id":"https://openalex.org/C120936851","wikidata":"https://www.wikidata.org/wiki/Q1408065","display_name":"MESI protocol","level":5,"score":0.271699994802475},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C194080101","wikidata":"https://www.wikidata.org/wiki/Q46306","display_name":"Access time","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C133588205","wikidata":"https://www.wikidata.org/wiki/Q28455645","display_name":"Instruction prefetch","level":3,"score":0.26600000262260437},{"id":"https://openalex.org/C113166858","wikidata":"https://www.wikidata.org/wiki/Q5015981","display_name":"Cache pollution","level":5,"score":0.2628999948501587},{"id":"https://openalex.org/C1793878","wikidata":"https://www.wikidata.org/wiki/Q1153762","display_name":"Out-of-order execution","level":2,"score":0.26269999146461487},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.25529998540878296},{"id":"https://openalex.org/C144240696","wikidata":"https://www.wikidata.org/wiki/Q367204","display_name":"Address space","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.22850","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22850","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.22850","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22850","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Prefix":[0],"KV":[1,35,56,77,114],"caching":[2],"has":[3],"become":[4],"a":[5,26,72,136],"key":[6],"mechanism":[7],"in":[8,58,79,117,169],"LLM":[9],"serving:":[10],"it":[11],"reduces":[12,206],"time":[13],"to":[14,188],"first":[15],"token":[16],"(TTFT)":[17],"by":[18,209],"avoiding":[19],"redundant":[20],"computation":[21],"across":[22,129],"requests":[23],"that":[24,84,109,146],"share":[25],"prefix":[27],"(i.e.,":[28],"the":[29,33,55,76,89,93,102,110,118,120,195],"system":[30],"prompt).":[31],"However,":[32],"accumulated":[34],"cache":[36,57,78,115],"is":[37,86,186],"often":[38],"larger":[39],"than":[40],"what":[41],"GPU":[42,121],"memory":[43],"and":[44,65,105,149,158],"local":[45,178,197],"DRAM":[46,60],"can":[47],"hold.":[48],"To":[49],"preserve":[50],"latency,":[51],"current":[52],"systems":[53],"keep":[54],"remote":[59],"pools,":[61],"increasing":[62],"serving-cluster":[63],"size":[64],"cost.":[66],"In":[67],"this":[68],"paper,":[69],"we":[70],"explore":[71],"different":[73],"approach:":[74],"storing":[75],"S3-compatible":[80],"object":[81],"storage":[82,103,111,148,163],"so":[83,108],"capacity":[85],"no":[87],"longer":[88],"constraint,":[90],"while":[91],"minimizing":[92],"impact":[94],"on":[95,135],"TTFT.":[96],"We":[97,132],"propose":[98],"ObjectCache,":[99],"which":[100],"co-designs":[101],"protocol":[104],"transfer":[106,126],"schedule":[107],"server":[112],"delivers":[113],"data":[116,125],"order":[119],"consumes":[122],"it,":[123],"overlapping":[124],"with":[127,141,212],"compute":[128,185],"concurrent":[130],"requests.":[131],"prototype":[133],"ObjectCache":[134,172,191],"100":[137],"Gbps":[138],"RoCE":[139],"cluster":[140],"NIXL":[142],"(an":[143,153,160],"inference":[144],"library":[145],"abstracts":[147],"memory),":[150],"Ceph":[151],"RGW":[152],"Object":[154],"Gateway":[155],"for":[156,180],"clusters),":[157],"DAOS":[159],"open":[161],"source":[162],"system).":[164],"For":[165],"64K":[166],"contexts,":[167,182],"common":[168],"today's":[170],"systems,":[171],"adds":[173,192],"only":[174],"5.6\\%":[175],"latency":[176],"over":[177,194],"DRAM;":[179],"4K":[181],"where":[183],"less":[184],"available":[187],"mask":[189],"transfer,":[190],"56--75\\,ms":[193],"optimal":[196],"layerwise":[198],"baseline.":[199],"Under":[200],"shared":[201],"bandwidth":[202,214],"caps,":[203],"our":[204],"scheduler":[205],"added":[207],"TTFT":[208],"1.2--1.8x":[210],"compared":[211],"equal":[213],"sharing.":[215]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-26T00:00:00"}
