{"id":"https://openalex.org/W4405934565","doi":"https://doi.org/10.1109/jiot.2024.3524255","title":"EdgeShard: Efficient LLM Inference via Collaborative Edge Computing","display_name":"EdgeShard: Efficient LLM Inference via Collaborative Edge Computing","publication_year":2024,"publication_date":"2024-12-31","ids":{"openalex":"https://openalex.org/W4405934565","doi":"https://doi.org/10.1109/jiot.2024.3524255"},"language":"en","primary_location":{"id":"doi:10.1109/jiot.2024.3524255","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jiot.2024.3524255","pdf_url":null,"source":{"id":"https://openalex.org/S2480266640","display_name":"IEEE Internet of Things Journal","issn_l":"2327-4662","issn":["2327-4662","2372-2541"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Internet of Things Journal","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5106557838","display_name":"Mingjin Zhang","orcid":"https://orcid.org/0000-0002-1653-108X"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Mingjin Zhang","raw_affiliation_strings":["Department of Computing, The Hong Kong Polytechnic University, Hong Kong, SAR, China","Department of Computing, The Hong Kong Polytechnic University, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-1653-108X","affiliations":[{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I14243506"]},{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067380011","display_name":"Xiaoming Shen","orcid":"https://orcid.org/0000-0002-8004-0074"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xiaoming Shen","raw_affiliation_strings":["Department of Computing, The Hong Kong Polytechnic University, Hong Kong, SAR, China","Department of Computing, The Hong Kong Polytechnic University, Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I14243506"]},{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100740023","display_name":"Jiannong Cao","orcid":"https://orcid.org/0000-0002-2725-2529"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Jiannong Cao","raw_affiliation_strings":["Department of Computing, The Hong Kong Polytechnic University, Hong Kong, SAR, China","Department of Computing, The Hong Kong Polytechnic University, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-2725-2529","affiliations":[{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I14243506"]},{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103117973","display_name":"Zeyang Cui","orcid":"https://orcid.org/0000-0003-0838-9594"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Zeyang Cui","raw_affiliation_strings":["Department of Computing, The Hong Kong Polytechnic University, Hong Kong, SAR, China","Department of Computing, The Hong Kong Polytechnic University, Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I14243506"]},{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055870001","display_name":"Shan Jiang","orcid":"https://orcid.org/0000-0002-4727-4856"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Shan Jiang","raw_affiliation_strings":["Department of Computing, The Hong Kong Polytechnic University, Hong Kong, SAR, China","Department of Computing, The Hong Kong Polytechnic University, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-4727-4856","affiliations":[{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I14243506"]},{"raw_affiliation_string":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5106557838"],"corresponding_institution_ids":["https://openalex.org/I14243506"],"apc_list":null,"apc_paid":null,"fwci":87.2722,"has_fulltext":false,"cited_by_count":120,"citation_normalized_percentile":{"value":0.99975086,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"12","issue":"10","first_page":"13119","last_page":"13131"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10270","display_name":"Blockchain Technology Applications and Security","score":0.7892000079154968,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10270","display_name":"Blockchain Technology Applications and Security","score":0.7892000079154968,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13999","display_name":"Digital Rights Management and Security","score":0.7775999903678894,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.7749999761581421,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8277443647384644},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6201832294464111},{"id":"https://openalex.org/keywords/edge-computing","display_name":"Edge computing","score":0.5371431112289429},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.4848957657814026},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.362801194190979},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.35778945684432983},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24880626797676086}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8277443647384644},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6201832294464111},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.5371431112289429},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.4848957657814026},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.362801194190979},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.35778945684432983},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24880626797676086}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jiot.2024.3524255","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jiot.2024.3524255","pdf_url":null,"source":{"id":"https://openalex.org/S2480266640","display_name":"IEEE Internet of Things Journal","issn_l":"2327-4662","issn":["2327-4662","2372-2541"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Internet of Things Journal","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/9"}],"awards":[{"id":"https://openalex.org/G4044893837","display_name":null,"funder_award_id":"C5032-23G","funder_id":"https://openalex.org/F4320327177","funder_display_name":"Hong Kong Institute for Monetary Research"}],"funders":[{"id":"https://openalex.org/F4320327177","display_name":"Hong Kong Institute for Monetary Research","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W2416799949","https://openalex.org/W2896180420","https://openalex.org/W2920031528","https://openalex.org/W2952667345","https://openalex.org/W2969388332","https://openalex.org/W2981114133","https://openalex.org/W3129519206","https://openalex.org/W3130823781","https://openalex.org/W3186281912","https://openalex.org/W3214376792","https://openalex.org/W4213044365","https://openalex.org/W4236099117","https://openalex.org/W4283204791","https://openalex.org/W4312438118","https://openalex.org/W4313524839","https://openalex.org/W4385245566","https://openalex.org/W4390659326","https://openalex.org/W4393160423","https://openalex.org/W4393372064","https://openalex.org/W4401211704","https://openalex.org/W4403421327","https://openalex.org/W4404687927","https://openalex.org/W6727099177","https://openalex.org/W6755207826","https://openalex.org/W6756718674","https://openalex.org/W6842601026","https://openalex.org/W6846164622","https://openalex.org/W6847478871","https://openalex.org/W6852800892","https://openalex.org/W6853048723","https://openalex.org/W6854613406","https://openalex.org/W6854866820","https://openalex.org/W6859074459","https://openalex.org/W6859967938","https://openalex.org/W6861718640","https://openalex.org/W6869283970","https://openalex.org/W6872496228"],"related_works":["https://openalex.org/W2055243143","https://openalex.org/W1986418932","https://openalex.org/W2357796999","https://openalex.org/W4321636575","https://openalex.org/W2741131631","https://openalex.org/W4324372666","https://openalex.org/W4225706866","https://openalex.org/W2914646191","https://openalex.org/W3023564924","https://openalex.org/W2942586735"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,209],"(LLMs)":[3],"have":[4],"shown":[5],"great":[6],"success":[7],"in":[8,43,119],"content":[9],"generation":[10],"and":[11,32,65,123,128,136,154,162,171,184,188,200,223],"intelligent":[12,13],"decision":[14],"making":[15],"for":[16],"IoT":[17],"systems.":[18],"Traditionally,":[19],"LLMs":[20,86,112,132],"are":[21,51,59,164],"deployed":[22],"on":[23,96,113,157,210],"the":[24,48,97,108,197,205,235],"cloud,":[25],"incurring":[26],"prolonged":[27],"latency,":[28],"high":[29,134],"bandwidth":[30,169],"costs,":[31],"privacy":[33],"concerns.":[34],"More":[35],"recently,":[36],"edge":[37,49,57,81,116,121],"computing":[38,117],"has":[39],"been":[40],"considered":[41],"promising":[42],"addressing":[44],"such":[45,73],"concerns":[46],"because":[47],"devices":[50,58,122],"closer":[52],"to":[53,82,110,130,146,195,219],"data":[54],"sources.":[55],"However,":[56],"cursed":[60],"by":[61,76],"their":[62],"limited":[63],"resources":[64,127],"can":[66],"hardly":[67],"afford":[68],"LLMs.":[69],"Existing":[70],"studies":[71],"address":[72],"a":[74,114,143,148,211],"limitation":[75],"offloading":[77],"heavy":[78],"workloads":[79],"from":[80],"cloud":[83,99,124],"or":[84,100],"compressing":[85],"via":[87],"model":[88,172,185],"quantization.":[89],"These":[90],"methods":[91],"either":[92],"still":[93],"rely":[94],"heavily":[95],"remote":[98],"suffer":[101],"substantial":[102],"accuracy":[103,138],"loss.":[104,139],"This":[105],"work":[106],"is":[107],"first":[109],"deploy":[111,155],"collaborative":[115],"environment,":[118],"which":[120],"servers":[125],"share":[126],"collaborate":[129],"infer":[131],"with":[133],"efficiency":[135],"no":[137],"We":[140],"design":[141,189],"EdgeShard,":[142],"novel":[144],"approach":[145],"partition":[147,161,186],"computation-intensive":[149],"LLM":[150],"into":[151],"affordable":[152],"shards":[153],"them":[156],"distributed":[158],"devices.":[159],"The":[160],"distribution":[163],"nontrivial,":[165],"considering":[166],"device":[167,182],"heterogeneity,":[168],"limitations,":[170],"complexity.":[173],"To":[174],"this":[175],"end,":[176],"we":[177],"formulate":[178],"an":[179,190],"adaptive":[180],"joint":[181],"selection":[183],"problem":[187],"efficient":[191],"dynamic":[192],"programming":[193],"algorithm":[194],"optimize":[196],"inference":[198],"latency":[199,221],"throughput.":[201],"Extensive":[202],"experiments":[203],"of":[204],"popular":[206],"Llama2":[207],"serial":[208],"real-world":[212],"testbed":[213],"reveal":[214],"that":[215],"EdgeShard":[216],"achieves":[217],"up":[218],"50%":[220],"reduction":[222],"<inline-formula":[224],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[225],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[226],"<tex-math":[227],"notation=\"LaTeX\">$2":[228],"\\times":[229],"$":[230],"</tex-math></inline-formula>":[231],"throughput":[232],"improvement":[233],"over":[234],"state-of-the-art.":[236]},"counts_by_year":[{"year":2026,"cited_by_count":22},{"year":2025,"cited_by_count":94},{"year":2024,"cited_by_count":4}],"updated_date":"2026-05-09T13:55:54.758798","created_date":"2025-10-10T00:00:00"}
