{"id":"https://openalex.org/W4416798106","doi":"https://doi.org/10.1109/tpds.2025.3638428","title":"Accelerating ML Inference via Opportunistic Pre-Loading on Serverless Clusters","display_name":"Accelerating ML Inference via Opportunistic Pre-Loading on Serverless Clusters","publication_year":2025,"publication_date":"2025-11-28","ids":{"openalex":"https://openalex.org/W4416798106","doi":"https://doi.org/10.1109/tpds.2025.3638428"},"language":null,"primary_location":{"id":"doi:10.1109/tpds.2025.3638428","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2025.3638428","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048347888","display_name":"Yifan Sui","orcid":"https://orcid.org/0009-0005-2261-5772"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifan Sui","raw_affiliation_strings":["Department of Automation of the School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","Departments of Automation of the School of Electronic Information and Electrical Engineering at, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0005-2261-5772","affiliations":[{"raw_affiliation_string":"Department of Automation of the School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Departments of Automation of the School of Electronic Information and Electrical Engineering at, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075787386","display_name":"Hanfei Yu","orcid":"https://orcid.org/0000-0001-5790-4981"},"institutions":[{"id":"https://openalex.org/I108468826","display_name":"Stevens Institute of Technology","ror":"https://ror.org/02z43xh36","country_code":"US","type":"education","lineage":["https://openalex.org/I108468826"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hanfei Yu","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Stevens Institute of Technology, Hoboken, NJ, USA","Department of Electrical and Computer Engineering Stevens Institute of Technology, Hoboken, NJ, USA"],"raw_orcid":"https://orcid.org/0000-0001-5790-4981","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Stevens Institute of Technology, Hoboken, NJ, USA","institution_ids":["https://openalex.org/I108468826"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering Stevens Institute of Technology, Hoboken, NJ, USA","institution_ids":["https://openalex.org/I108468826"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yitao Hu","orcid":"https://orcid.org/0009-0004-0458-0900"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yitao Hu","raw_affiliation_strings":["Tianjin Key Laboratory of Advanced Networking of the Department of Intelligence and Computing, Tianjin University, Tianjin, China","Tianjin Key Laboratory of Advanced Network- ing of the Department of Intelligence and Computing at, Tianjin University, Tianjin, China"],"raw_orcid":"https://orcid.org/0009-0004-0458-0900","affiliations":[{"raw_affiliation_string":"Tianjin Key Laboratory of Advanced Networking of the Department of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Tianjin Key Laboratory of Advanced Network- ing of the Department of Intelligence and Computing at, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101967607","display_name":"Jianxun Li","orcid":"https://orcid.org/0000-0003-4205-8561"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianxun Li","raw_affiliation_strings":["Department of Automation of the School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","Departments of Automation of the School of Electronic Information and Electrical Engineering at, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-4205-8561","affiliations":[{"raw_affiliation_string":"Department of Automation of the School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Departments of Automation of the School of Electronic Information and Electrical Engineering at, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100769481","display_name":"Hao Wang","orcid":"https://orcid.org/0000-0002-1444-2657"},"institutions":[{"id":"https://openalex.org/I108468826","display_name":"Stevens Institute of Technology","ror":"https://ror.org/02z43xh36","country_code":"US","type":"education","lineage":["https://openalex.org/I108468826"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hao Wang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Stevens Institute of Technology, Hoboken, NJ, USA","Department of Electrical and Computer Engineering Stevens Institute of Technology, Hoboken, NJ, USA"],"raw_orcid":"https://orcid.org/0000-0002-1444-2657","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Stevens Institute of Technology, Hoboken, NJ, USA","institution_ids":["https://openalex.org/I108468826"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering Stevens Institute of Technology, Hoboken, NJ, USA","institution_ids":["https://openalex.org/I108468826"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.46063705,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"37","issue":"2","first_page":"472","last_page":"488"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.6538000106811523,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.6538000106811523,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.23980000615119934,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.025299999862909317,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7537999749183655},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6388000249862671},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5965999960899353},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.5907999873161316},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.588699996471405},{"id":"https://openalex.org/keywords/agile-software-development","display_name":"Agile software development","score":0.5218999981880188},{"id":"https://openalex.org/keywords/load-balancing","display_name":"Load balancing (electrical power)","score":0.4611000120639801},{"id":"https://openalex.org/keywords/dram","display_name":"Dram","score":0.3285999894142151},{"id":"https://openalex.org/keywords/execution-time","display_name":"Execution time","score":0.3273000121116638}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8959000110626221},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7537999749183655},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6388000249862671},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5965999960899353},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.5907999873161316},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.588699996471405},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5422999858856201},{"id":"https://openalex.org/C14185376","wikidata":"https://www.wikidata.org/wiki/Q30232","display_name":"Agile software development","level":2,"score":0.5218999981880188},{"id":"https://openalex.org/C138959212","wikidata":"https://www.wikidata.org/wiki/Q1806783","display_name":"Load balancing (electrical power)","level":3,"score":0.4611000120639801},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.34290000796318054},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.3285999894142151},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.3273000121116638},{"id":"https://openalex.org/C16320812","wikidata":"https://www.wikidata.org/wiki/Q1812200","display_name":"Idle","level":2,"score":0.32409998774528503},{"id":"https://openalex.org/C19012869","wikidata":"https://www.wikidata.org/wiki/Q578372","display_name":"Response time","level":2,"score":0.3224000036716461},{"id":"https://openalex.org/C2984822820","wikidata":"https://www.wikidata.org/wiki/Q1123036","display_name":"Processor scheduling","level":3,"score":0.31439998745918274},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.30550000071525574},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.29980000853538513},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.29589998722076416},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.29280000925064087},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.29190000891685486},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.287200003862381},{"id":"https://openalex.org/C2779370713","wikidata":"https://www.wikidata.org/wiki/Q357554","display_name":"Load management","level":2,"score":0.2824999988079071},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C106516650","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm design","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C2994168587","wikidata":"https://www.wikidata.org/wiki/Q5295","display_name":"Random access memory","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.25360000133514404},{"id":"https://openalex.org/C3017489831","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Running time","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2025.3638428","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2025.3638428","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6368830644","display_name":null,"funder_award_id":"61673265","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1975442866","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2618530766","https://openalex.org/W2901693060","https://openalex.org/W2932772649","https://openalex.org/W2963974647","https://openalex.org/W2993353322","https://openalex.org/W3012028616","https://openalex.org/W3021115254","https://openalex.org/W3096533337","https://openalex.org/W3097213700","https://openalex.org/W3112094022","https://openalex.org/W3112236759","https://openalex.org/W3118820805","https://openalex.org/W3125961627","https://openalex.org/W3130689885","https://openalex.org/W3155705005","https://openalex.org/W3159219445","https://openalex.org/W3159401322","https://openalex.org/W3203063759","https://openalex.org/W3204075005","https://openalex.org/W3205898353","https://openalex.org/W3207498965","https://openalex.org/W3209190856","https://openalex.org/W3209259988","https://openalex.org/W4214610523","https://openalex.org/W4214690606","https://openalex.org/W4220796798","https://openalex.org/W4220848902","https://openalex.org/W4224313989","https://openalex.org/W4246193833","https://openalex.org/W4283219705","https://openalex.org/W4294904165","https://openalex.org/W4312060067","https://openalex.org/W4312781266","https://openalex.org/W4321636696","https://openalex.org/W4360831839","https://openalex.org/W4385623190","https://openalex.org/W4388041447","https://openalex.org/W4394585838","https://openalex.org/W4394871711","https://openalex.org/W4394923129","https://openalex.org/W4404385262"],"related_works":[],"abstract_inverted_index":{"Serverless":[0],"computing":[1],"has":[2,69],"emerged":[3],"as":[4],"a":[5,66],"novel":[6],"paradigm":[7],"in":[8,39,65,100,166],"cloud":[9],"computing,":[10],"characterized":[11],"by":[12,90],"its":[13],"agile":[14],"scalability,":[15],"cost-effective":[16],"pay-as-you-go":[17],"billing,":[18],"and":[19,36,76,110,153,169,175,183,199,217,240],"user-friendly":[20],"capabilities":[21],"for":[22,127,192],"Machine":[23],"Learning":[24],"(ML)":[25],"inference":[26,102,152],"tasks.":[27],"Developers":[28],"wrap":[29],"their":[30],"ML":[31,94,101,148],"algorithms":[32],"into":[33],"serverless":[34,194,255],"functions":[35],"run":[37],"them":[38],"containers.":[40,119],"However,":[41],"the":[42,49,57,86,91,104,114,143,163,253],"well-known":[43],"cold-start":[44],"problem":[45],"significantly":[46,112],"slows":[47],"down":[48],"response":[50],"time":[51,105,115],"of":[52,59,93],"functions.":[53],"To":[54],"address":[55,85],"cold-starts,":[56],"technique":[58],"pre-warming,":[60],"which":[61],"proactively":[62],"maintains":[63],"containers":[64,168],"warm":[67,118],"state,":[68],"gained":[70],"widespread":[71],"adoption":[72],"across":[73],"both":[74],"research":[75],"industry.":[77],"Nevertheless,":[78],"we":[79],"observed":[80],"that":[81,230],"pre-warming":[82,124,221,249],"does":[83],"not":[84],"distinct":[87],"delays":[88],"caused":[89],"loading":[92,147,238],"artifacts.":[95],"According":[96],"to":[97,107,117,141,171,204,212,215,236,243,247,264],"our":[98],"analysis,":[99],"functions,":[103],"required":[106,173],"load":[108,202],"libraries":[109,174],"models":[111],"exceeds":[113],"needed":[116],"Thus,":[120],"relying":[121],"solely":[122],"on":[123,224],"is":[125,190],"insufficient":[126],"mitigating":[128],"cold-starts.":[129],"This":[130],"paper":[131],"presents":[132],"<italic":[133,158,187,209,231,258],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[134,159,188,210,232,259],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Tyche</i>,":[135],"an":[136,178],"opportunistic":[137],"pre-loading":[138,256],"approach":[139],"designed":[140],"eliminate":[142],"latency":[144,239],"associated":[145],"with":[146,219,226,252],"artifacts,":[149],"enabling":[150],"near-instant":[151],"minimizing":[154],"function":[155],"execution":[156],"time.":[157],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Tyche</i>":[160,189,211,233,260],"fully":[161],"leverages":[162],"idle":[164],"memory":[165],"warmed":[167],"GPUs":[170],"pre-load":[172],"models,":[176],"striking":[177],"optimal":[179],"balance":[180],"between":[181],"acceleration":[182],"resource":[184],"efficiency.":[185],"Additionally,":[186],"tailored":[191],"large-scale":[193],"platforms,":[195],"incorporating":[196],"cluster-wide":[197],"scheduling":[198],"lightweight":[200],"locality-aware":[201],"balancing":[203],"enhance":[205],"performance.":[206],"We":[207],"design":[208],"be":[213],"transparent":[214],"providers":[216],"compatible":[218],"existing":[220],"solutions.":[222,250],"Experiments":[223],"OpenWhisk":[225],"real-world":[227],"workloads":[228],"show":[229],"reduces":[234],"up":[235,242,263],"93%":[237],"achieves":[241,262],"8\u00d7":[244],"speedup":[245],"compared":[246],"state-of-the-art":[248,254],"Compared":[251],"solution,":[257],"also":[261],"1.9\u00d7":[265],"speedup.":[266]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-28T00:00:00"}
