{"id":"https://openalex.org/W4379390254","doi":"https://doi.org/10.1145/3555041.3589720","title":"SMILE: A Cost-Effective System for Serving Massive Pretrained Language Models in The Cloud","display_name":"SMILE: A Cost-Effective System for Serving Massive Pretrained Language Models in The Cloud","publication_year":2023,"publication_date":"2023-06-04","ids":{"openalex":"https://openalex.org/W4379390254","doi":"https://doi.org/10.1145/3555041.3589720"},"language":"en","primary_location":{"id":"doi:10.1145/3555041.3589720","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3555041.3589720","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the 2023 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100440609","display_name":"Jue Wang","orcid":"https://orcid.org/0000-0002-6712-1929"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jue Wang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-6712-1929","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100451997","display_name":"Ke Chen","orcid":"https://orcid.org/0000-0002-3062-0900"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ke Chen","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-3062-0900","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103017455","display_name":"Lidan Shou","orcid":"https://orcid.org/0000-0001-8062-8356"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lidan Shou","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0001-8062-8356","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103032435","display_name":"Dawei Jiang","orcid":"https://orcid.org/0009-0001-2807-1068"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dawei Jiang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0001-2807-1068","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100389286","display_name":"Gang Chen","orcid":"https://orcid.org/0000-0002-7483-0045"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Chen","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-7483-0045","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100440609"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.1177,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.37858578,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"135","last_page":"138"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9840999841690063,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8355069160461426},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.7772054672241211},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5622739195823669},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.556521475315094},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.5260170102119446},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.5194467902183533},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.45290619134902954},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.4522358775138855},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.4422450363636017},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.426496297121048},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3679802119731903},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3196946382522583},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.27086806297302246},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.17971602082252502},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.10172957181930542}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8355069160461426},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.7772054672241211},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5622739195823669},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.556521475315094},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.5260170102119446},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.5194467902183533},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.45290619134902954},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.4522358775138855},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4422450363636017},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.426496297121048},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3679802119731903},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3196946382522583},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27086806297302246},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.17971602082252502},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.10172957181930542}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3555041.3589720","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3555041.3589720","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the 2023 International Conference on Management of Data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W2294710185","https://openalex.org/W2770885069","https://openalex.org/W4285212262"],"related_works":["https://openalex.org/W2770234245","https://openalex.org/W96612179","https://openalex.org/W4229499248","https://openalex.org/W2566006169","https://openalex.org/W1567818861","https://openalex.org/W2987774938","https://openalex.org/W4256492088","https://openalex.org/W632915154","https://openalex.org/W2055733372","https://openalex.org/W3022067003"],"abstract_inverted_index":{"Deep":[0],"learning":[1],"models,":[2,76],"particularly":[3],"pre-trained":[4],"language":[5],"models":[6,23,55,87,223],"(PLMs),":[7],"have":[8,174],"become":[9],"increasingly":[10],"important":[11],"for":[12,48,104,155,247,257],"a":[13,44,64,96,167,175,242],"variety":[14],"of":[15,53,98,122,181,188,197],"applications":[16],"that":[17,220],"require":[18,28],"text/language":[19],"processing.":[20],"However,":[21],"these":[22,86],"are":[24,164,171],"resource-intensive":[25],"and":[26,51,77,101,117,133,144,170,190,236,244,251],"often":[27],"costly":[29],"hardware":[30,111,157],"such":[31,54],"as":[32],"dedicated":[33,156],"GPU":[34],"servers.":[35],"In":[36,160,215],"response":[37],"to":[38,62,81,139,173,193,205,249],"this":[39],"issue,":[40],"we":[41,94,218],"present":[42,95],"SMILE,":[43],"novel":[45],"prototype":[46],"system":[47,120,240],"efficient":[49,179],"deployment":[50],"management":[52],"in":[56,149,209,254],"the":[57,150,153,185,199,210,233,255],"cloud.":[58],"Our":[59,119],"goal":[60],"is":[61],"build":[63],"cloud":[65,151,211,256],"platform":[66],"from":[67,166],"which":[68,136],"tenants":[69,138,204,248],"can":[70,224],"easily":[71,140],"derive":[72],"their":[73,146,207,258],"own":[74],"custom":[75],"rent":[78],"PLM":[79,148],"processors":[80,163],"run":[82],"inference":[83,234],"services":[84],"on":[85,227],"at":[88,212],"reduced":[89],"costs.":[90,214],"To":[91],"facilitate":[92],"this,":[93],"co-design":[97],"cost-effective":[99,245],"storage":[100,129],"computation":[102],"scheme":[103],"managing":[105],"massive":[106],"customized":[107,147,259],"PLMs":[108,189,208,253],"with":[109],"constrained":[110],"resources":[112,182],"via":[113],"effective":[114],"resource":[115],"sharing":[116],"multiplexing.":[118],"consists":[121],"four":[123],"core":[124],"components:":[125],"vPLM":[126,128,131,134,162],"creator,":[127],"appliance,":[130],"trainer,":[132],"processor,":[135],"allow":[137],"create,":[141],"store,":[142],"train,":[143],"use":[145],"without":[152,231],"need":[154],"or":[158],"maintenance.":[159],"particular,":[161],"virtualized":[165],"physical":[168],"machine,":[169],"designed":[172],"multi-tenant":[176],"nature,":[177],"enabling":[178],"utilization":[180],"by":[183],"precomputing":[184],"intermediate":[186],"representation":[187],"using":[191],"adapters":[192],"provide":[194],"customization":[195],"instead":[196],"training":[198],"entire":[200],"model.":[201],"This":[202],"allows":[203],"host":[206,250],"minor":[213],"our":[216,239],"demonstration,":[217],"show":[219],"over":[221],"10,000":[222],"be":[225],"hosted":[226],"one":[228],"single":[229],"machine":[230],"compromising":[232],"speed":[235],"accuracy.":[237],"Overall,":[238],"provides":[241],"convenient":[243],"solution":[246],"manage":[252],"tasks.":[260]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
