{"id":"https://openalex.org/W4408738433","doi":"https://doi.org/10.1145/3719330.3721230","title":"An I/O Characterizing Study of Offloading LLM Models and KV Caches to NVMe SSD","display_name":"An I/O Characterizing Study of Offloading LLM Models and KV Caches to NVMe SSD","publication_year":2025,"publication_date":"2025-03-22","ids":{"openalex":"https://openalex.org/W4408738433","doi":"https://doi.org/10.1145/3719330.3721230"},"language":"en","primary_location":{"id":"doi:10.1145/3719330.3721230","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3719330.3721230","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Workshop on Challenges and Opportunities of Efficient and Performant Storage Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3719330.3721230","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037945597","display_name":"Zebin Ren","orcid":"https://orcid.org/0000-0003-1466-0002"},"institutions":[{"id":"https://openalex.org/I865915315","display_name":"Vrije Universiteit Amsterdam","ror":"https://ror.org/008xxew50","country_code":"NL","type":"education","lineage":["https://openalex.org/I865915315"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Zebin Ren","raw_affiliation_strings":["Vrije Universiteit Amsterdam, Amsterdam, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0003-1466-0002","affiliations":[{"raw_affiliation_string":"Vrije Universiteit Amsterdam, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I865915315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085081429","display_name":"Krijn Doekemeijer","orcid":"https://orcid.org/0009-0007-7530-4438"},"institutions":[{"id":"https://openalex.org/I865915315","display_name":"Vrije Universiteit Amsterdam","ror":"https://ror.org/008xxew50","country_code":"NL","type":"education","lineage":["https://openalex.org/I865915315"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Krijn Doekemeijer","raw_affiliation_strings":["Vrije Universiteit Amsterdam, Amsterdam, The Netherlands"],"raw_orcid":"https://orcid.org/0009-0007-7530-4438","affiliations":[{"raw_affiliation_string":"Vrije Universiteit Amsterdam, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I865915315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042851074","display_name":"Tiziano De Matteis","orcid":"https://orcid.org/0000-0002-9158-6849"},"institutions":[{"id":"https://openalex.org/I865915315","display_name":"Vrije Universiteit Amsterdam","ror":"https://ror.org/008xxew50","country_code":"NL","type":"education","lineage":["https://openalex.org/I865915315"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Tiziano De Matteis","raw_affiliation_strings":["Vrije Universiteit Amsterdam, Amsterdam, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0002-9158-6849","affiliations":[{"raw_affiliation_string":"Vrije Universiteit Amsterdam, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I865915315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031211716","display_name":"Christian Pinto","orcid":"https://orcid.org/0000-0001-7060-2742"},"institutions":[{"id":"https://openalex.org/I4210145784","display_name":"IBM Research - Ireland","ror":"https://ror.org/04jnxr720","country_code":"IE","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145784"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Christian Pinto","raw_affiliation_strings":["IBM Research Europe, Dublin, Ireland"],"raw_orcid":"https://orcid.org/0000-0001-7060-2742","affiliations":[{"raw_affiliation_string":"IBM Research Europe, Dublin, Ireland","institution_ids":["https://openalex.org/I4210145784"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091823418","display_name":"Radu Stoica","orcid":"https://orcid.org/0009-0005-8089-866X"},"institutions":[{"id":"https://openalex.org/I4210126328","display_name":"IBM Research - Zurich","ror":"https://ror.org/02js37d36","country_code":"CH","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210126328"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Radu Stoica","raw_affiliation_strings":["IBM Research Europe, Zurich, Switzerland"],"raw_orcid":"https://orcid.org/0009-0005-8089-866X","affiliations":[{"raw_affiliation_string":"IBM Research Europe, Zurich, Switzerland","institution_ids":["https://openalex.org/I4210126328"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002623098","display_name":"Animesh Trivedi","orcid":"https://orcid.org/0000-0003-3586-7168"},"institutions":[{"id":"https://openalex.org/I4210126328","display_name":"IBM Research - Zurich","ror":"https://ror.org/02js37d36","country_code":"CH","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210126328"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Animesh Trivedi","raw_affiliation_strings":["IBM Research Europe, Zurich, Switzerland"],"raw_orcid":"https://orcid.org/0000-0003-3586-7168","affiliations":[{"raw_affiliation_string":"IBM Research Europe, Zurich, Switzerland","institution_ids":["https://openalex.org/I4210126328"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5037945597"],"corresponding_institution_ids":["https://openalex.org/I865915315"],"apc_list":null,"apc_paid":null,"fwci":11.0302,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.98193833,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"23","last_page":"33"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9772999882698059,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12162","display_name":"Cellular Automata and Applications","score":0.9609000086784363,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6055915355682373},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.4847699999809265},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.4017813205718994}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6055915355682373},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.4847699999809265},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4017813205718994}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3719330.3721230","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3719330.3721230","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Workshop on Challenges and Opportunities of Efficient and Performant Storage Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:research.vu.nl:openaire/6b8262a7-d868-4a07-83a3-94dd71ec6b04","is_oa":true,"landing_page_url":"https://research.vu.nl/en/publications/6b8262a7-d868-4a07-83a3-94dd71ec6b04","pdf_url":null,"source":{"id":"https://openalex.org/S4306401107","display_name":"VU Research Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I865915315","host_organization_name":"Vrije Universiteit Amsterdam","host_organization_lineage":["https://openalex.org/I865915315"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ren, Z, Doekemeijer, K, De Matteis, T, Pinto, C, Stoica, R & Trivedi, A 2025, An I/O Characterizing Study of Offloading LLM Models and KV Caches to NVMe SSD. in CHEOPS '25 : Proceedings of the 5th Workshop on Challenges and Opportunities of Efficient and Performant Storage Systems. Association for Computing Machinery, Inc, pp. 23-33, 5th Workshop on Challenges and Opportunities of Efficient and Performant Storage Systems, CHEOPS 2025, Rotterdam, Netherlands, 31/03/25. https://doi.org/10.1145/3719330.3721230","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:research.vu.nl:publications/6b8262a7-d868-4a07-83a3-94dd71ec6b04","is_oa":true,"landing_page_url":"https://hdl.handle.net/1871.1/6b8262a7-d868-4a07-83a3-94dd71ec6b04","pdf_url":null,"source":{"id":"https://openalex.org/S4306401107","display_name":"VU Research Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I865915315","host_organization_name":"Vrije Universiteit Amsterdam","host_organization_lineage":["https://openalex.org/I865915315"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Ren, Z, Doekemeijer, K, De Matteis, T, Pinto, C, Stoica, R & Trivedi, A 2025, An I/O Characterizing Study of Offloading LLM Models and KV Caches to NVMe SSD. in CHEOPS '25 : Proceedings of the 5th Workshop on Challenges and Opportunities of Efficient and Performant Storage Systems. Association for Computing Machinery, Inc, pp. 23-33, 5th Workshop on Challenges and Opportunities of Efficient and Performant Storage Systems, CHEOPS 2025, Rotterdam, Netherlands, 31/03/25. https://doi.org/10.1145/3719330.3721230","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1145/3719330.3721230","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3719330.3721230","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Workshop on Challenges and Opportunities of Efficient and Performant Storage Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6226168429","display_name":null,"funder_award_id":"OCENW.KLEIN.561","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W95608104","https://openalex.org/W398859631","https://openalex.org/W2612690371","https://openalex.org/W2751343396","https://openalex.org/W3205803342","https://openalex.org/W4229005866","https://openalex.org/W4281752694","https://openalex.org/W4292973577","https://openalex.org/W4321636575","https://openalex.org/W4360831773","https://openalex.org/W4386826409","https://openalex.org/W4387321091","https://openalex.org/W4388855645","https://openalex.org/W4390041933","https://openalex.org/W4391012746","https://openalex.org/W4391160280","https://openalex.org/W4393942918","https://openalex.org/W4396746969","https://openalex.org/W4400377444","https://openalex.org/W4400582844","https://openalex.org/W4401176373","https://openalex.org/W4401211704","https://openalex.org/W4402671659","https://openalex.org/W4402806483","https://openalex.org/W4403617301","https://openalex.org/W4404401017","https://openalex.org/W4404401018"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"With":[0],"the":[1,12,46,61,68,72,75,87,97,111,129,144,191,196,214,227,246,263,266],"popularity":[2],"of":[3,11,22,24,74,89,108,117,128,132,169,199,230,249,269],"generative":[4],"AI,":[5],"LLM":[6,42,151],"inference":[7,43,152],"has":[8],"become":[9],"one":[10],"most":[13],"popular":[14,18],"cloud":[15],"workloads.":[16],"Modern":[17],"LLMs":[19],"have":[20,83,124],"hundreds":[21],"billions":[23],"parameters":[25],"and":[26,64,77,100,114,142,155,160,187,211,225,237,265],"support":[27,158],"very":[28],"large":[29],"input/output":[30],"prompt":[31],"token":[32],"sizes":[33],"(100K-1M).":[34],"As":[35,71],"a":[36,106,125],"result,":[37],"their":[38],"computational":[39],"state":[40],"during":[41],"can":[44],"exceed":[45],"memory":[47,56],"available":[48],"on":[49,110],"GPUs.":[50],"One":[51],"solution":[52],"to":[53,59,67,80,85,95,123,164],"this":[54,137,270],"GPU":[55],"problem":[57],"is":[58,105,202,251],"offload":[60],"model":[62,98,159,200,218],"weights":[63,99],"KV":[65,101,161,231],"cache":[66,162,232],"host":[69],"memory.":[70],"size":[73],"models":[76],"prompts":[78],"continue":[79],"increase,":[81],"researchers":[82],"started":[84],"explore":[86],"use":[88],"secondary":[90],"storage,":[91],"such":[92],"as":[93],"SSDs,":[94],"store":[96],"cache.":[102],"However,":[103],"there":[104],"lack":[107],"study":[109],"I/O":[112,147,171,182,197,228,267],"characteristics":[113,131],"performance":[115,130],"requirements":[116],"these":[118,133,170],"offloading":[119,134,163,179,201,219,233],"operations.":[120],"In":[121],"order":[122],"better":[126],"understanding":[127],"operations,":[135],"in":[136,213],"work,":[138],"we":[139,173],"collect,":[140],"study,":[141],"characterize":[143],"block":[145,215],"layer":[146],"traces":[148,268],"from":[149],"two":[150],"frameworks,":[153],"DeepSpeed":[154,210],"FlexGen,":[156],"that":[157],"SSDs.":[165],"Through":[166],"our":[167],"analysis":[168],"traces,":[172],"report":[174],"that:":[175],"(i)":[176],"libaio-based":[177],"tensor":[178],"delivers":[180],"higher":[181,253],"bandwidth":[183,248],"for":[184,208],"both":[185,209,235],"writing":[186],"reading":[188],"tensors":[189],"to/from":[190],"SSDs":[192],"than":[193,254],"POSIX;":[194],"(ii)":[195],"workload":[198,229],"dominated":[203,240],"by":[204,241],"128":[205,242],"KiB":[206,243],"reads":[207],"FlexGen":[212],"layer;":[216],"(iii)":[217],"does":[220],"not":[221],"saturate":[222],"NVMe":[223],"SSDs;":[224],"(iv)":[226],"contains":[234],"read":[236,250],"write":[238,255],"workloads":[239],"requests,":[244],"but":[245],"average":[247],"much":[252],"(2.0":[256],"GiB/s":[257],"vs.":[258],"11.0":[259],"MiB/s).":[260],"We":[261],"open-source":[262],"scripts":[264],"work":[271],"at":[272],"https://github.com/stonet-research/cheops25-IO-characterization-of-LLM-model-kv-cache-offloading-nvme":[273]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":4}],"updated_date":"2026-05-19T08:33:51.333923","created_date":"2025-10-10T00:00:00"}
