{"id":"https://openalex.org/W4405717632","doi":"https://doi.org/10.1109/tmc.2024.3513457","title":"EdgeLLM: Fast On-Device LLM Inference With Speculative Decoding","display_name":"EdgeLLM: Fast On-Device LLM Inference With Speculative Decoding","publication_year":2024,"publication_date":"2024-12-23","ids":{"openalex":"https://openalex.org/W4405717632","doi":"https://doi.org/10.1109/tmc.2024.3513457"},"language":"en","primary_location":{"id":"doi:10.1109/tmc.2024.3513457","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmc.2024.3513457","pdf_url":null,"source":{"id":"https://openalex.org/S69141925","display_name":"IEEE Transactions on Mobile Computing","issn_l":"1536-1233","issn":["1536-1233","1558-0660","2161-9875"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Mobile Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034382142","display_name":"Daliang Xu","orcid":"https://orcid.org/0000-0002-6775-0688"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Daliang Xu","raw_affiliation_strings":["Key Laboratory of High Confidence Software Technologies, Ministry of Education, Peking University, Beijing, China","Key Laboratory of High Confidence Software Technologies, Ministry of Education; School of Computer Science, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of High Confidence Software Technologies, Ministry of Education, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Key Laboratory of High Confidence Software Technologies, Ministry of Education; School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101293258","display_name":"Wangsong Yin","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wangsong Yin","raw_affiliation_strings":["Key Laboratory of High Confidence Software Technologies, Ministry of Education, Peking University, Beijing, China","Key Laboratory of High Confidence Software Technologies, Ministry of Education; School of Computer Science, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of High Confidence Software Technologies, Ministry of Education, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Key Laboratory of High Confidence Software Technologies, Ministry of Education; School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hao Zhang","orcid":"https://orcid.org/0009-0007-1107-4688"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]},{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Zhang","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China","School of Software Engineering, Beijing Jiaotong University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]},{"raw_affiliation_string":"School of Software Engineering, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101882004","display_name":"Xin Jin","orcid":"https://orcid.org/0000-0001-8741-5847"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Jin","raw_affiliation_strings":["Key Laboratory of High Confidence Software Technologies, Ministry of Education, Peking University, Beijing, China","Key Laboratory of High Confidence Software Technologies, Ministry of Education; School of Computer Science, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of High Confidence Software Technologies, Ministry of Education, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Key Laboratory of High Confidence Software Technologies, Ministry of Education; School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ying Zhang","orcid":"https://orcid.org/0009-0009-6924-2319"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Zhang","raw_affiliation_strings":["Key Laboratory of High Confidence Software Technologies, Ministry of Education, Peking University, Beijing, China","Key Laboratory of High Confidence Software Technologies, Ministry of Education; School of Computer Science, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of High Confidence Software Technologies, Ministry of Education, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Key Laboratory of High Confidence Software Technologies, Ministry of Education; School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101243237","display_name":"Shiyun Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shiyun Wei","raw_affiliation_strings":["Zhongguancun Laboratory, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Zhongguancun Laboratory, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089690212","display_name":"Mengwei Xu","orcid":"https://orcid.org/0000-0001-6271-6993"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengwei Xu","raw_affiliation_strings":["State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":null,"display_name":"Xuanzhe Liu","orcid":"https://orcid.org/0000-0002-7908-8484"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuanzhe Liu","raw_affiliation_strings":["Key Laboratory of High Confidence Software Technologies, Ministry of Education, Peking University, Beijing, China","Key Laboratory of High Confidence Software Technologies, Ministry of Education; School of Computer Science, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of High Confidence Software Technologies, Ministry of Education, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Key Laboratory of High Confidence Software Technologies, Ministry of Education; School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5034382142"],"corresponding_institution_ids":["https://openalex.org/I20231570","https://openalex.org/I4210128818"],"apc_list":null,"apc_paid":null,"fwci":6.1471,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.96937917,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"24","issue":"4","first_page":"3256","last_page":"3273"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.8848000168800354,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.8848000168800354,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.8623999953269958,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.8355000019073486,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8424695730209351},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.6750704646110535},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5336872339248657},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.34731054306030273},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.19670018553733826},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15364301204681396}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8424695730209351},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.6750704646110535},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5336872339248657},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.34731054306030273},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.19670018553733826},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15364301204681396}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmc.2024.3513457","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmc.2024.3513457","pdf_url":null,"source":{"id":"https://openalex.org/S69141925","display_name":"IEEE Transactions on Mobile Computing","issn_l":"1536-1233","issn":["1536-1233","1558-0660","2161-9875"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Mobile Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4497957390","display_name":null,"funder_award_id":"62172008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8414340155","display_name":null,"funder_award_id":"62325201","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W2889402930","https://openalex.org/W2897268228","https://openalex.org/W2907337720","https://openalex.org/W2962814013","https://openalex.org/W2963748441","https://openalex.org/W2969388332","https://openalex.org/W2996874060","https://openalex.org/W3012479151","https://openalex.org/W3147954149","https://openalex.org/W3155457266","https://openalex.org/W3159727696","https://openalex.org/W3163966209","https://openalex.org/W3169483174","https://openalex.org/W3180037928","https://openalex.org/W3201174429","https://openalex.org/W3211149853","https://openalex.org/W4214951654","https://openalex.org/W4311431702","https://openalex.org/W4312060029","https://openalex.org/W4312933868","https://openalex.org/W4318541554","https://openalex.org/W4385245566","https://openalex.org/W4399140776","https://openalex.org/W4401042997","https://openalex.org/W4402671659","https://openalex.org/W4404401018","https://openalex.org/W6691646088","https://openalex.org/W6713645886","https://openalex.org/W6727099177","https://openalex.org/W6737294268","https://openalex.org/W6769311223","https://openalex.org/W6769627184","https://openalex.org/W6772383348","https://openalex.org/W6773815586","https://openalex.org/W6774062504","https://openalex.org/W6778883912","https://openalex.org/W6782879696","https://openalex.org/W6785197036","https://openalex.org/W6789500345","https://openalex.org/W6789645339","https://openalex.org/W6796581206","https://openalex.org/W6803096969","https://openalex.org/W6810702803","https://openalex.org/W6838461927","https://openalex.org/W6838633097","https://openalex.org/W6839193947","https://openalex.org/W6843409372","https://openalex.org/W6846164622","https://openalex.org/W6847386241","https://openalex.org/W6847478871","https://openalex.org/W6848194691","https://openalex.org/W6849530321","https://openalex.org/W6850162387","https://openalex.org/W6850625674","https://openalex.org/W6853864807","https://openalex.org/W6855616086","https://openalex.org/W6857551316","https://openalex.org/W6860743466","https://openalex.org/W6862776294","https://openalex.org/W6870251909","https://openalex.org/W6873385310","https://openalex.org/W6873691999","https://openalex.org/W6874869863"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Generative":[0],"tasks,":[1],"such":[2],"as":[3],"text":[4],"generation":[5,200],"and":[6,104,115,141],"question":[7],"answering,":[8],"are":[9],"essential":[10],"for":[11,63],"mobile":[12],"applications.":[13],"Given":[14],"their":[15,126],"inherent":[16],"privacy":[17],"sensitivity,":[18],"executing":[19],"them":[20,144],"on":[21,34],"devices":[22],"is":[23,75,203],"demanded.":[24],"Nowadays,":[25],"the":[26,35,41,47,68,119,131,138,159,163,174,188],"execution":[27],"of":[28,49,99,121,134],"these":[29,50],"generative":[30],"tasks":[31],"heavily":[32],"relies":[33],"Large":[36],"Language":[37],"Models":[38],"(LLMs).":[39],"However,":[40],"scarce":[42],"device":[43],"memory":[44,70],"severely":[45],"hinders":[46],"scalability":[48],"models.":[51],"We":[52],"present":[53],"<monospace":[54,72,90,108,176,194],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[55,73,91,109,177,195],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">EdgeLLM</monospace>,":[56],"an":[57,167],"efficient":[58],"on-device":[59],"LLM":[60,165,185],"inference":[61],"system":[62],"models":[64],"whose":[65],"sizes":[66],"exceed":[67],"device's":[69],"capacity.":[71],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">EdgeLLM</monospace>":[74,92,110,178,196],"built":[76],"atop":[77],"speculative":[78],"decoding,":[79],"which":[80,202],"delegates":[81],"most":[82],"tokens":[83,182],"to":[84,117,125,129,137,142,205],"a":[85,101,152],"smaller,":[86],"memory-resident":[87],"(draft)":[88],"LLM.":[89],"integrates":[93],"three":[94],"novel":[95],"techniques:":[96],"(1)":[97],"Instead":[98],"generating":[100,181],"fixed":[102],"width":[103],"depth":[105],"token":[106,199],"tree,":[107],"proposes":[111,179],"compute-efficient":[112],"branch":[113,140],"navigation":[114],"verification":[116,160,186],"pace":[118],"progress":[120],"different":[122],"branches":[123],"according":[124],"accepted":[127],"probability":[128],"prevent":[130],"wasteful":[132],"allocation":[133],"computing":[135],"resources":[136],"wrong":[139],"verify":[143],"all":[145],"at":[146],"once":[147],"efficiently.":[148],"(2)":[149],"It":[150],"uses":[151],"self-adaptive":[153],"fallback":[154],"strategy":[155],"that":[156],"promptly":[157],"initiates":[158],"process":[161],"when":[162],"smaller":[164],"generates":[166],"incorrect":[168],"token.":[169],"(3)":[170],"To":[171],"not":[172],"block":[173],"generation,":[175],"speculatively":[180],"during":[183],"large":[184],"with":[187],"compute-IO":[189],"pipeline.":[190],"Through":[191],"extensive":[192],"experiments,":[193],"exhibits":[197],"impressive":[198],"speed":[201],"up":[204],"9.3\u00d7":[206],"faster":[207],"than":[208],"existing":[209],"engines.":[210]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":15}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
