{"id":"https://openalex.org/W4414760881","doi":"https://doi.org/10.1145/3711875.3729141","title":"EdgeLoRA: An Efficient Multi-Tenant LLM Serving System on Edge Devices","display_name":"EdgeLoRA: An Efficient Multi-Tenant LLM Serving System on Edge Devices","publication_year":2025,"publication_date":"2025-06-23","ids":{"openalex":"https://openalex.org/W4414760881","doi":"https://doi.org/10.1145/3711875.3729141"},"language":"en","primary_location":{"id":"doi:10.1145/3711875.3729141","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711875.3729141","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3711875.3729141","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 23rd Annual International Conference on Mobile Systems, Applications and Services","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3711875.3729141","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zheyu Shen","orcid":"https://orcid.org/0009-0001-3542-0713"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zheyu Shen","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015835765","display_name":"Yexiao He","orcid":"https://orcid.org/0000-0002-4675-7733"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yexiao He","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ziyao Wang","orcid":"https://orcid.org/0009-0007-9810-464X"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ziyao Wang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020749130","display_name":"Yunyao Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuning Zhang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101230023","display_name":"Guoheng Sun","orcid":"https://orcid.org/0009-0004-4346-8516"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guoheng Sun","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113324145","display_name":"Wanghao Ye","orcid":null},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wanghao Ye","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100413615","display_name":"Ang Li","orcid":"https://orcid.org/0000-0002-4990-1729"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ang Li","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Maryland College Park, College Park, MD, USA","institution_ids":["https://openalex.org/I66946132"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I66946132"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14027605,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"138","last_page":"153"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.984000027179718,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adapter","display_name":"Adapter (computing)","score":0.7562000155448914},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5737000107765198},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.4846000075340271},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.4652999937534332},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.4203000068664551},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4018000066280365},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.35580000281333923}],"concepts":[{"id":"https://openalex.org/C177284502","wikidata":"https://www.wikidata.org/wiki/Q1005390","display_name":"Adapter (computing)","level":2,"score":0.7562000155448914},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6899999976158142},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5737000107765198},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.4846000075340271},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.4652999937534332},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.428600013256073},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.4203000068664551},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4018000066280365},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.35580000281333923},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.352400004863739},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3443000018596649},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.3319999873638153},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.3098999857902527},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.3084000051021576},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.2727999985218048},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.2624000012874603},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.2565999925136566}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3711875.3729141","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711875.3729141","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3711875.3729141","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 23rd Annual International Conference on Mobile Systems, Applications and Services","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2507.01438","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.01438","pdf_url":"https://arxiv.org/pdf/2507.01438","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3711875.3729141","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711875.3729141","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3711875.3729141","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 23rd Annual International Conference on Mobile Systems, Applications and Services","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G612080321","display_name":null,"funder_award_id":"2431611","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4414760881.pdf","grobid_xml":"https://content.openalex.org/works/W4414760881.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W2144499799","https://openalex.org/W2734941459","https://openalex.org/W2912924812","https://openalex.org/W2963929190","https://openalex.org/W3027042170","https://openalex.org/W3089472875","https://openalex.org/W3138516171","https://openalex.org/W3152703111","https://openalex.org/W3210314098","https://openalex.org/W4224308101","https://openalex.org/W4387321091","https://openalex.org/W4402670135","https://openalex.org/W4402860127","https://openalex.org/W4404356490","https://openalex.org/W4404401018","https://openalex.org/W4406650295","https://openalex.org/W6948966457"],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"gained":[5],"significant":[6,105],"attention":[7],"due":[8],"to":[9,31,34,134,150,165,200,214,232],"their":[10],"versatility":[11],"across":[12],"a":[13,242],"wide":[14],"array":[15],"of":[16,73,101,188,218,236],"applications.":[17],"Fine-tuning":[18],"LLMs":[19,42,61,116,237],"with":[20,205],"parameter-efficient":[21],"adapters,":[22],"such":[23,50],"as":[24,51],"Low-Rank":[25],"Adaptation":[26],"(LoRA),":[27],"enables":[28,161],"these":[29],"models":[30],"efficiently":[32,62],"adapt":[33],"downstream":[35],"tasks":[36],"without":[37,223],"extensive":[38],"retraining.":[39],"Deploying":[40],"fine-tuned":[41],"on":[43,63,117],"multi-tenant":[44,92,121,239],"edge":[45,65,118,234],"devices":[46,66,119],"offers":[47],"substantial":[48],"benefits,":[49],"reduced":[52],"latency,":[53],"enhanced":[54],"privacy,":[55],"and":[56,104,148,155,191,244],"personalized":[57],"responses.":[58],"However,":[59],"serving":[60,115],"resource-constrained":[64,248],"presents":[67],"critical":[68],"challenges,":[69],"including":[70],"the":[71,87,91,136,173,181],"complexity":[72],"adapter":[74,83,131,137,146],"selection":[75,132],"for":[76,114,247],"different":[77],"tasks,":[78],"memory":[79,142,152],"overhead":[80],"from":[81],"frequent":[82],"swapping.":[84],"Moreover,":[85],"given":[86],"multiple":[88],"requests":[89,95],"in":[90,99,120,186,203,238],"settings,":[93],"processing":[94,164],"sequentially":[96],"will":[97],"result":[98],"underutilization":[100],"computational":[102,168],"resources":[103],"latency.":[106,169],"This":[107],"paper":[108],"introduces":[109],"EdgeLoRA,":[110],"an":[111,129],"efficient":[112,162,245],"system":[113],"environments.":[122,249],"EdgeLoRA":[123,178,196],"incorporates":[124],"three":[125],"key":[126],"innovations:":[127],"(1)":[128],"adaptive":[130],"mechanism":[133],"streamline":[135],"configuration":[138],"process;":[139],"(2)":[140],"heterogeneous":[141],"management,":[143],"leveraging":[144],"intelligent":[145],"caching":[147],"pooling":[149],"mitigate":[151],"operation":[153],"overhead;":[154],"(3)":[156],"batch":[157,163],"LoRA":[158],"inference,":[159],"which":[160],"significantly":[166,179],"reduce":[167],"Comprehensive":[170],"evaluations":[171],"using":[172],"Llama3.1-8B":[174],"model":[175],"demonstrates":[176,195],"that":[177],"outperforms":[180],"status":[182],"quo":[183],"(i.e.,":[184],"llama.cpp)":[185],"terms":[187],"both":[189],"latency":[190],"throughput.":[192],"The":[193],"results":[194,228],"could":[197],"achieve":[198],"up":[199],"4\u00d7":[201],"boost":[202],"throughput":[204],"less":[206],"energy":[207],"consumption.":[208],"Even":[209],"more":[210,220],"impressively,":[211],"it":[212],"manages":[213],"serve":[215],"several":[216],"orders":[217],"magnitude":[219],"adapters":[221],"simultaneously":[222],"sacrificing":[224],"inference":[225],"performance.":[226],"These":[227],"highlight":[229],"EdgeLoRA's":[230],"potential":[231],"transform":[233],"deployment":[235],"scenarios,":[240],"offering":[241],"scalable":[243],"solution":[246]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
