{"id":"https://openalex.org/W4399534541","doi":"https://doi.org/10.1145/3662006.3662067","title":"Hybrid SLM and LLM for Edge-Cloud Collaborative Inference","display_name":"Hybrid SLM and LLM for Edge-Cloud Collaborative Inference","publication_year":2024,"publication_date":"2024-06-03","ids":{"openalex":"https://openalex.org/W4399534541","doi":"https://doi.org/10.1145/3662006.3662067"},"language":"en","primary_location":{"id":"doi:10.1145/3662006.3662067","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3662006.3662067","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3662006.3662067","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Workshop on Edge and Mobile Foundation Models","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3662006.3662067","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093259788","display_name":"Zixu Hao","orcid":"https://orcid.org/0009-0007-1671-1367"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","GB"],"is_corresponding":true,"raw_author_name":"Zixu Hao","raw_affiliation_strings":["Tsinghua University, Microsoft Research"],"raw_orcid":"https://orcid.org/0009-0007-1671-1367","affiliations":[{"raw_affiliation_string":"Tsinghua University, Microsoft Research","institution_ids":["https://openalex.org/I4210164937","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070156365","display_name":"Huiqiang Jiang","orcid":"https://orcid.org/0000-0002-1327-4882"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Huiqiang Jiang","raw_affiliation_strings":["Microsoft Research"],"raw_orcid":"https://orcid.org/0000-0002-1327-4882","affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101989690","display_name":"Shiqi Jiang","orcid":"https://orcid.org/0000-0002-4685-9633"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shiqi Jiang","raw_affiliation_strings":["Microsoft Research"],"raw_orcid":"https://orcid.org/0000-0002-4685-9633","affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015419107","display_name":"Ju Ren","orcid":"https://orcid.org/0000-0003-2782-183X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ju Ren","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":"https://orcid.org/0000-0003-2782-183X","affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101534161","display_name":"Ting Cao","orcid":"https://orcid.org/0000-0002-9107-013X"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ting Cao","raw_affiliation_strings":["Microsoft Research"],"raw_orcid":"https://orcid.org/0000-0002-9107-013X","affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5093259788"],"corresponding_institution_ids":["https://openalex.org/I4210164937","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":13.4558,"has_fulltext":false,"cited_by_count":40,"citation_normalized_percentile":{"value":0.99178689,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"36","last_page":"41"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9714999794960022,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9588000178337097,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.7712225317955017},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6326397061347961},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.5965805053710938},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5761577486991882},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19975125789642334},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.16702991724014282}],"concepts":[{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.7712225317955017},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6326397061347961},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.5965805053710938},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5761577486991882},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19975125789642334},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.16702991724014282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3662006.3662067","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3662006.3662067","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3662006.3662067","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Workshop on Edge and Mobile Foundation Models","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3662006.3662067","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3662006.3662067","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3662006.3662067","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Workshop on Edge and Mobile Foundation Models","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4399534541.pdf"},"referenced_works_count":13,"referenced_works":["https://openalex.org/W2888482885","https://openalex.org/W2980856918","https://openalex.org/W3043914740","https://openalex.org/W3049640275","https://openalex.org/W3094144275","https://openalex.org/W3102767875","https://openalex.org/W3120615301","https://openalex.org/W3156189202","https://openalex.org/W3200886011","https://openalex.org/W3213941755","https://openalex.org/W4317927938","https://openalex.org/W4388874804","https://openalex.org/W4391876619"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W4244478748","https://openalex.org/W4223488648","https://openalex.org/W2134969820","https://openalex.org/W2251605416","https://openalex.org/W2560439919","https://openalex.org/W4389340727","https://openalex.org/W3150465815","https://openalex.org/W1997222214"],"abstract_inverted_index":{"Edge-Cloud":[0,24,60],"collaboration":[1,25,61],"for":[2,62],"deep":[3],"learning":[4],"inference":[5,13],"has":[6],"been":[7],"actively":[8],"studied,":[9],"to":[10,94,108],"enhance":[11],"the":[12,37,49,74,81],"performance":[14],"by":[15],"leveraging":[16],"both":[17],"Edge":[18,75],"and":[19,48],"Cloud":[20],"resources.":[21],"However,":[22],"traditional":[23],"based":[26],"on":[27,73,112],"model":[28],"partitioning":[29],"or":[30],"confidence":[31],"score":[32],"are":[33],"not":[34],"suitable":[35],"in":[36],"LLM":[38,87,106],"(large":[39],"language":[40,67],"models)":[41],"era,":[42],"because":[43],"of":[44],"its":[45],"autoregressive":[46],"generation":[47],"generality":[50],"across":[51],"diverse":[52],"tasks.":[53],"This":[54],"paper":[55],"proposes":[56],"a":[57,90],"dynamic":[58],"token-level":[59,78],"LLMs.":[63],"A":[64],"SLM":[65],"(small":[66],"model)":[68],"such":[69],"as":[70],"TinyLlama":[71],"resides":[72],"devices,":[76],"through":[77],"interaction":[79],"with":[80,89],"Cloud-side":[82],"LLMs":[83],"during":[84],"inference,":[85],"approaching":[86],"quality":[88,111],"controllable":[91],"cost":[92,107],"similar":[93],"SLM.":[95],"Evaluation":[96],"results":[97],"show":[98],"that":[99],"our":[100],"method":[101],"can":[102],"only":[103],"use":[104],"25.8%":[105],"achieve":[109],"LLM-comparable":[110],"GSM8K":[113],"task.":[114]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":31},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
