{"id":"https://openalex.org/W4387609097","doi":"https://doi.org/10.1109/tnet.2023.3321967","title":"Automating Cloud Deployment for Real-Time Online Foundation Model Inference","display_name":"Automating Cloud Deployment for Real-Time Online Foundation Model Inference","publication_year":2023,"publication_date":"2023-10-13","ids":{"openalex":"https://openalex.org/W4387609097","doi":"https://doi.org/10.1109/tnet.2023.3321967"},"language":"en","primary_location":{"id":"doi:10.1109/tnet.2023.3321967","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnet.2023.3321967","pdf_url":null,"source":{"id":"https://openalex.org/S62238642","display_name":"IEEE/ACM Transactions on Networking","issn_l":"1063-6692","issn":["1063-6692","1558-2566"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Networking","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077532419","display_name":"Yang Li","orcid":"https://orcid.org/0000-0003-3180-3511"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yang Li","raw_affiliation_strings":["School of Software, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100357922","display_name":"Zhenhua Li","orcid":"https://orcid.org/0000-0001-7286-122X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenhua Li","raw_affiliation_strings":["School of Software, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049750274","display_name":"Zhenhua Han","orcid":"https://orcid.org/0000-0002-2880-7100"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenhua Han","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085281913","display_name":"Quanlu Zhang","orcid":"https://orcid.org/0000-0003-0557-1104"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quanlu Zhang","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003951925","display_name":"Xiaobo Ma","orcid":"https://orcid.org/0000-0002-0934-5035"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaobo Ma","raw_affiliation_strings":["School of Computer Science and Technology, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5077532419"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.6959,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.76310351,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"32","issue":"2","first_page":"1509","last_page":"1523"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.8530364036560059},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.8108477592468262},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.6600452661514282},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6185005307197571},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6119007468223572},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.24372589588165283},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24035292863845825},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.19408798217773438},{"id":"https://openalex.org/keywords/archaeology","display_name":"Archaeology","score":0.06280988454818726},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.05952504277229309}],"concepts":[{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.8530364036560059},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.8108477592468262},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.6600452661514282},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6185005307197571},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6119007468223572},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.24372589588165283},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24035292863845825},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.19408798217773438},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.06280988454818726},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.05952504277229309}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tnet.2023.3321967","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnet.2023.3321967","pdf_url":null,"source":{"id":"https://openalex.org/S62238642","display_name":"IEEE/ACM Transactions on Networking","issn_l":"1063-6692","issn":["1063-6692","1558-2566"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Networking","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2586125334","display_name":null,"funder_award_id":"61972313","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5089311613","display_name":null,"funder_award_id":"2022M721831","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G5461702903","display_name":null,"funder_award_id":"61902211","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7296112749","display_name":null,"funder_award_id":"100336949","funder_id":"https://openalex.org/F4320307764","funder_display_name":"Microsoft"},{"id":"https://openalex.org/G7391993428","display_name":null,"funder_award_id":"62202266","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8037786011","display_name":null,"funder_award_id":"2023-JC-JQ-50","funder_id":"https://openalex.org/F4320336567","funder_display_name":"Natural Science Basic Research Program of Shaanxi Province"}],"funders":[{"id":"https://openalex.org/F4320307764","display_name":"Microsoft","ror":"https://ror.org/00d0nc645"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"},{"id":"https://openalex.org/F4320336567","display_name":"Natural Science Basic Research Program of Shaanxi Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":75,"referenced_works":["https://openalex.org/W1480909796","https://openalex.org/W1510052597","https://openalex.org/W1522301498","https://openalex.org/W1686810756","https://openalex.org/W1922655562","https://openalex.org/W1967288931","https://openalex.org/W2012975714","https://openalex.org/W2024060531","https://openalex.org/W2064675550","https://openalex.org/W2107667898","https://openalex.org/W2108598243","https://openalex.org/W2119717200","https://openalex.org/W2125531986","https://openalex.org/W2128193809","https://openalex.org/W2133564696","https://openalex.org/W2157331557","https://openalex.org/W2160815625","https://openalex.org/W2161455936","https://openalex.org/W2168505588","https://openalex.org/W2183341477","https://openalex.org/W2189149359","https://openalex.org/W2194775991","https://openalex.org/W2327501763","https://openalex.org/W2402144811","https://openalex.org/W2525778437","https://openalex.org/W2549139847","https://openalex.org/W2581790986","https://openalex.org/W2614121823","https://openalex.org/W2617411258","https://openalex.org/W2761251889","https://openalex.org/W2767236912","https://openalex.org/W2772948367","https://openalex.org/W2779866762","https://openalex.org/W2798291715","https://openalex.org/W2896457183","https://openalex.org/W2944935235","https://openalex.org/W2952332632","https://openalex.org/W2978633783","https://openalex.org/W2981758446","https://openalex.org/W3013082411","https://openalex.org/W3047371394","https://openalex.org/W3099464315","https://openalex.org/W3108109508","https://openalex.org/W3189927210","https://openalex.org/W3195577433","https://openalex.org/W3203889165","https://openalex.org/W4226479682","https://openalex.org/W4236269389","https://openalex.org/W4238076109","https://openalex.org/W4249545506","https://openalex.org/W4312191382","https://openalex.org/W4385245566","https://openalex.org/W4387212540","https://openalex.org/W6608206471","https://openalex.org/W6622556047","https://openalex.org/W6631190155","https://openalex.org/W6637373629","https://openalex.org/W6640090968","https://openalex.org/W6675365184","https://openalex.org/W6678911119","https://openalex.org/W6679434410","https://openalex.org/W6679436768","https://openalex.org/W6684191040","https://openalex.org/W6719982585","https://openalex.org/W6727690538","https://openalex.org/W6730742100","https://openalex.org/W6735916004","https://openalex.org/W6738144653","https://openalex.org/W6747245092","https://openalex.org/W6751627690","https://openalex.org/W6756009870","https://openalex.org/W6768695126","https://openalex.org/W6779965347","https://openalex.org/W6800751262","https://openalex.org/W6811928498"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2770234245","https://openalex.org/W96612179","https://openalex.org/W4229499248","https://openalex.org/W2566006169","https://openalex.org/W1567818861","https://openalex.org/W2987774938","https://openalex.org/W4256492088"],"abstract_inverted_index":{"Deep":[0,184],"neural":[1],"network":[2],"(DNN)":[3],"foundation":[4,159,254],"models":[5,252],"are":[6,24],"currently":[7],"exhibiting":[8],"high":[9],"prediction":[10],"accuracy":[11],"and":[12,37,43,47,126,183,195,202,217,237,241,253,266,270],"strong":[13],"adaptability":[14],"to":[15,87,117,151,187,264],"broad":[16],"tasks":[17],"with":[18,162,198,268,277],"remarkably":[19],"large":[20],"model":[21,86,91,160],"scales.":[22],"They":[23],"increasingly":[25],"becoming":[26],"the":[27,82,85,96,118,154,166,180,190,205,208,212,239],"backend":[28],"support":[29],"of":[30,63,69,84,123,144,168,207,234,243],"DNN-driven":[31],"real-time":[32,158],"online":[33],"services,":[34],"e.g.,":[35],"Siri":[36],"Instagram.":[38],"Such":[39],"services":[40,57],"require":[41],"low-latency":[42],"cost-efficiency":[44,206],"for":[45,59,93,157,249],"quality-of-service":[46],"commercial":[48],"competitiveness.":[49],"When":[50],"deployed":[51],"in":[52],"a":[53,75,131,134,223],"cloud":[54,64,124,155,193],"environment,":[55],"these":[56],"call":[58],"an":[60],"appropriate":[61],"selection":[62],"configurations":[65,125],"(i.e.,":[66],"specific":[67],"types":[68],"VM":[70],"instances),":[71],"as":[72,74],"well":[73],"considerate":[76],"device":[77,127,196],"placement":[78,128,197],"plan":[79],"that":[80,248],"places":[81],"operations":[83],"multiple":[88],"GPUs":[89],"via":[90],"parallelism":[92],"cost-efficiency.":[94],"Currently,":[95],"deployment":[97,136,156,209],"mainly":[98],"relies":[99],"on":[100,211,227,232],"service":[101],"providers\u2019":[102],"manual":[103],"efforts,":[104],"which":[105],"is":[106,174],"not":[107],"only":[108],"onerous":[109],"but":[110],"also":[111],"far":[112],"from":[113],"satisfactory":[114],"oftentimes":[115],"due":[116],"huge":[119],"joint":[120],"search":[121,200,273],"space":[122],"plans":[129],"(for":[130],"same":[132],"service,":[133],"poor":[135],"can":[137],"incur":[138],"significantly":[139],"more":[140],"costs":[141,164,261],"by":[142,176,262],"tens":[143],"times).":[145],"In":[146],"this":[147],"paper,":[148],"we":[149],"attempt":[150,173],"efficiently":[152],"automate":[153],"inference":[161,260],"minimum":[163],"under":[165],"constraint":[167],"acceptably":[169],"low":[170],"latency.":[171],"This":[172],"enabled":[175],"1)":[177],"jointly":[178],"leveraging":[179],"Bayesian":[181],"Optimization":[182],"Reinforcement":[185],"Learning":[186],"adaptively":[188],"unearth":[189],"(nearly)":[191],"optimal":[192],"configuration":[194],"limited":[199],"time,":[201],"2)":[203],"enhancing":[204],"based":[210,226],"probing-informed":[213],"block":[214],"multiplexing":[215],"mechanism":[216],"Tensor":[218],"Algebra":[219],"SuperOptimizer.":[220],"We":[221],"implement":[222],"prototype":[224],"system":[225],"TensorFlow,":[228],"conduct":[229],"extensive":[230],"experiments":[231],"top":[233],"Microsoft":[235],"Azure,":[236],"demonstrate":[238],"generality":[240],"scalability":[242],"our":[244,256],"solution.":[245],"Results":[246],"show":[247],"lightweight":[250],"DNN":[251],"models,":[255],"solution":[257],"essentially":[258],"saves":[259],"up":[263],"15%":[265],"47%":[267],"57%":[269],"38%":[271],"lower":[272],"overheads":[274],"respectively,":[275],"compared":[276],"non-trivial":[278],"baselines.":[279]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
