{"id":"https://openalex.org/W4404057314","doi":"https://doi.org/10.1109/socc62300.2024.10737825","title":"Efficient Deployment of Large Language Model across Cloud-Device Systems","display_name":"Efficient Deployment of Large Language Model across Cloud-Device Systems","publication_year":2024,"publication_date":"2024-09-16","ids":{"openalex":"https://openalex.org/W4404057314","doi":"https://doi.org/10.1109/socc62300.2024.10737825"},"language":"en","primary_location":{"id":"doi:10.1109/socc62300.2024.10737825","is_oa":false,"landing_page_url":"https://doi.org/10.1109/socc62300.2024.10737825","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 37th International System-on-Chip Conference (SOCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052071646","display_name":"Fan Yang","orcid":"https://orcid.org/0000-0003-1842-1084"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fan Yang","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100769604","display_name":"Zehao Wang","orcid":"https://orcid.org/0009-0007-6159-7727"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zehao Wang","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038968284","display_name":"Haoyu Zhang","orcid":"https://orcid.org/0009-0003-3889-8688"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyu Zhang","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103138440","display_name":"Zhenhua Zhu","orcid":"https://orcid.org/0009-0007-9259-7180"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenhua Zhu","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100832142","display_name":"Xinhao Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinhao Yang","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015946486","display_name":"Guohao Dai","orcid":"https://orcid.org/0000-0003-0849-3252"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guohao Dai","raw_affiliation_strings":["Shanghai Jiao Tong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100445061","display_name":"Yu Wang","orcid":"https://orcid.org/0000-0001-6108-5157"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Wang","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5052071646"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":2.6032,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.91235822,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13062","display_name":"Cognitive Computing and Networks","score":0.8475000262260437,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13062","display_name":"Cognitive Computing and Networks","score":0.8475000262260437,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.8226000070571899,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.7394000291824341,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.8232753872871399},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7276340126991272},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6829709410667419},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.21104440093040466},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.17690005898475647}],"concepts":[{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.8232753872871399},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7276340126991272},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6829709410667419},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.21104440093040466},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.17690005898475647}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/socc62300.2024.10737825","is_oa":false,"landing_page_url":"https://doi.org/10.1109/socc62300.2024.10737825","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 37th International System-on-Chip Conference (SOCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320314786","display_name":"Xilinx","ror":"https://ror.org/01rb7bk56"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2924304610","https://openalex.org/W3030801383","https://openalex.org/W3049640275","https://openalex.org/W3093987685","https://openalex.org/W3102767875","https://openalex.org/W3110777925","https://openalex.org/W3172419167","https://openalex.org/W4285217780","https://openalex.org/W4292779060","https://openalex.org/W4315705963","https://openalex.org/W4377971462","https://openalex.org/W4378464713","https://openalex.org/W4384918448","https://openalex.org/W4385245566","https://openalex.org/W4390486238","https://openalex.org/W4395065783","https://openalex.org/W4398841363","https://openalex.org/W4401508667","https://openalex.org/W6778883912","https://openalex.org/W6848604973","https://openalex.org/W6852818246","https://openalex.org/W6854866820","https://openalex.org/W6865347877","https://openalex.org/W6868260766"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2770234245","https://openalex.org/W96612179","https://openalex.org/W4229499248","https://openalex.org/W2566006169","https://openalex.org/W1567818861","https://openalex.org/W2987774938","https://openalex.org/W4256492088"],"abstract_inverted_index":{"The":[0,58],"capabilities":[1],"of":[2,20,29,204],"large":[3],"language":[4],"models":[5],"(LLMs)":[6],"in":[7,35],"text":[8],"comprehension":[9],"and":[10,22,47,70,86,115,137,156,176,183,198],"generation":[11],"are":[12],"advancing":[13],"artificial":[14],"intelligence.":[15],"However,":[16,77],"the":[17,26,36,63,68,78,119,124,129,146,193],"growing":[18],"number":[19],"parameters":[21],"computational":[23],"demands":[24],"challenge":[25],"efficient":[27,177],"deployment":[28,104,178],"inference":[30,103,184,188,196],"services.":[31],"High-performance":[32],"GPU":[33,157],"clusters":[34],"cloud":[37,69],"can":[38],"meet":[39,54],"these":[40,98],"requirements":[41],"but":[42],"incur":[43],"high":[44],"service":[45],"costs":[46],"network":[48],"stability":[49],"issues,":[50],"which":[51],"struggle":[52],"to":[53,73,160,208],"service-level":[55],"agreements":[56],"(SLAs).":[57],"\u201ccloud-device":[59],"collaboration\u201d":[60],"approach":[61],"leverages":[62],"heterogeneous":[64],"hardware":[65,113,147,181],"on":[66,105],"both":[67],"device":[71],"sides":[72],"satisfy":[74],"SlAs":[75],"efficiently.":[76],"varying":[79],"operational":[80,154],"intensity":[81,155],"among":[82],"different":[83,134],"LLM":[84,102,125,170],"operators":[85],"their":[87],"dynamic":[88],"nature":[89],"complicate":[90],"load":[91],"scheduling":[92,111,120,143],"for":[93,173,195],"cloud-device":[94,106,169],"systems.":[95],"To":[96],"address":[97],"challenges,":[99],"we":[100,122,149,166],"optimize":[101],"systems":[107],"through":[108],"three":[109],"aspects:":[110],"algorithm,":[112,121],"modeling,":[114,148],"compilation":[116],"deployment.":[117],"For":[118,145],"analyze":[123],"computation":[126],"network,":[127],"evaluate":[128],"computation-to-memory":[130],"access":[131],"ratio":[132],"under":[133],"sequence":[135],"lengths,":[136],"propose":[138],"a":[139,151,168],"greedy":[140],"algorithm-based":[141],"operator-level":[142],"strategy.":[144],"establish":[150],"relationship":[152],"between":[153],"resource":[158],"utilization":[159],"estimate":[161],"operator":[162],"running":[163],"time.":[164],"Finally,":[165],"designed":[167],"compiler":[171],"framework":[172,191],"quantitative":[174],"evaluation":[175],"across":[179],"various":[180],"combinations":[182],"tasks.":[185],"In":[186],"specific":[187],"scenarios,":[189],"our":[190],"satisfies":[192],"need":[194],"latency":[197],"achieves":[199],"an":[200],"average":[201],"cost":[202],"reduction":[203],"$20.7":[205],"\\%$":[206],"compared":[207],"cloud-side-only":[209],"inference.":[210]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
