{"id":"https://openalex.org/W7133503349","doi":"https://doi.org/10.1109/hpca68181.2026.11408492","title":"ELORA: Efficient LoRA and KV Cache Management for Multi-LoRA LLM Serving","display_name":"ELORA: Efficient LoRA and KV Cache Management for Multi-LoRA LLM Serving","publication_year":2026,"publication_date":"2026-01-31","ids":{"openalex":"https://openalex.org/W7133503349","doi":"https://doi.org/10.1109/hpca68181.2026.11408492"},"language":null,"primary_location":{"id":"doi:10.1109/hpca68181.2026.11408492","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408492","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020345102","display_name":"Jiuchen Shi","orcid":"https://orcid.org/0000-0002-5470-210X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiuchen Shi","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128088237","display_name":"Hang Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hang Zhang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100651550","display_name":"Yixiao Wang","orcid":"https://orcid.org/0000-0003-2664-3605"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yixiao Wang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377840","display_name":"Quan Chen","orcid":"https://orcid.org/0000-0001-5832-0347"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Chen","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111117324","display_name":"Yizhou Shan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yizhou Shan","raw_affiliation_strings":["Huawei Cloud"],"affiliations":[{"raw_affiliation_string":"Huawei Cloud","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037447327","display_name":"Kaihua Fu","orcid":"https://orcid.org/0000-0001-5117-7162"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Kaihua Fu","raw_affiliation_strings":["Hong Kong University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128123743","display_name":"Wei Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Wei Wang","raw_affiliation_strings":["Hong Kong University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5098559247","display_name":"Minyi Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minyi Guo","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5020345102"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.54707663,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12079","display_name":"IoT Networks and Protocols","score":0.2443999946117401,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12079","display_name":"IoT Networks and Protocols","score":0.2443999946117401,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10080","display_name":"Energy Efficient Wireless Sensor Networks","score":0.1324000060558319,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10246","display_name":"Mobile Ad Hoc Networks","score":0.066600002348423,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.4250999987125397},{"id":"https://openalex.org/keywords/troubleshooting","display_name":"Troubleshooting","score":0.29120001196861267},{"id":"https://openalex.org/keywords/session","display_name":"Session (web analytics)","score":0.2750999927520752},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.2702000141143799},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.2556000053882599}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5903000235557556},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.4372999966144562},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.4250999987125397},{"id":"https://openalex.org/C147494362","wikidata":"https://www.wikidata.org/wiki/Q2078905","display_name":"Troubleshooting","level":2,"score":0.29120001196861267},{"id":"https://openalex.org/C2779182362","wikidata":"https://www.wikidata.org/wiki/Q17126187","display_name":"Session (web analytics)","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2662999927997589},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.251800000667572},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca68181.2026.11408492","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408492","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1616349796","display_name":null,"funder_award_id":"2024YFB4505703","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G4179261564","display_name":null,"funder_award_id":"62232011,62302302","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4190178005","display_name":null,"funder_award_id":"25ZR1402241","funder_id":"https://openalex.org/F4320309612","funder_display_name":"Natural Science Foundation of Shanghai"}],"funders":[{"id":"https://openalex.org/F4320309612","display_name":"Natural Science Foundation of Shanghai","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2511832088","https://openalex.org/W2964347220","https://openalex.org/W2985067290","https://openalex.org/W3017454464","https://openalex.org/W3095319910","https://openalex.org/W3174770825","https://openalex.org/W3205898353","https://openalex.org/W4205991051","https://openalex.org/W4239937081","https://openalex.org/W4253438286","https://openalex.org/W4385570973","https://openalex.org/W4387321091","https://openalex.org/W4402671659","https://openalex.org/W4408847395","https://openalex.org/W4408848702","https://openalex.org/W4408894144","https://openalex.org/W4411403450","https://openalex.org/W4415797413","https://openalex.org/W4415799029","https://openalex.org/W7133233472","https://openalex.org/W7133239970"],"related_works":[],"abstract_inverted_index":{"Multiple":[0],"Low-Rank":[1],"Adapters":[2],"(Multi-LoRA)":[3],"are":[4],"gaining":[5],"popularity":[6],"for":[7],"task-specific":[8],"Large":[9],"Language":[10],"Model":[11],"(LLM)":[12],"applications.":[13],"For":[14],"Multi-LoRA":[15,33,58],"serving,":[16],"caching":[17,48,59,94],"hot":[18],"LoRAs":[19,49,85,105],"and":[20,50,72,86,106],"KV":[21,51,87,107],"caches":[22,88,108],"in":[23],"the":[24,63,81,100,116,130],"GPU":[25,117],"memory":[26,118],"can":[27],"improve":[28],"inference":[29,34,90],"performance.":[30,65],"However,":[31],"existing":[32],"systems":[35],"fail":[36],"to":[37,61,142],"optimize":[38,62],"serving":[39,64],"performance":[40],"like":[41],"Time-To-First-Token":[42],"(TTFT),":[43],"neglecting":[44],"usage":[45,82],"dependencies":[46,83],"when":[47,115],"caches.":[52],"We":[53],"therefore":[54],"propose":[55],"ELORA,":[56],"a":[57,68,73,92,111],"system":[60],"ELORA":[66,128],"comprises":[67],"dependency-aware":[69],"cache":[70,75,78,97],"manager":[71,79],"performancedriven":[74],"swapper.":[76],"The":[77,96],"maintains":[80],"between":[84],"during":[89],"with":[91],"unified":[93,112],"pool.":[95],"swapper":[98],"determines":[99],"swap-in":[101],"or":[102,121],"swap-out":[103],"of":[104],"based":[109],"on":[110,139],"cost":[113],"model,":[114],"is":[119],"idle":[120],"busy,":[122],"respectively.":[123],"Experimental":[124],"results":[125],"show":[126],"that":[127],"reduces":[129],"TTFT":[131],"by":[132],"<tex":[133],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[134],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\mathbf{4":[135],"5.":[136],"7":[137],"\\%}$</tex>":[138],"average,":[140],"compared":[141],"state-of-the-art":[143],"works.":[144]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-03-05T00:00:00"}
