{"id":"https://openalex.org/W4394998727","doi":"https://doi.org/10.1145/3620665.3640411","title":"SpotServe: Serving Generative Large Language Models on Preemptible Instances","display_name":"SpotServe: Serving Generative Large Language Models on Preemptible Instances","publication_year":2024,"publication_date":"2024-04-22","ids":{"openalex":"https://openalex.org/W4394998727","doi":"https://doi.org/10.1145/3620665.3640411"},"language":"en","primary_location":{"id":"doi:10.1145/3620665.3640411","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640411","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3620665.3640411","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015552951","display_name":"Xupeng Miao","orcid":"https://orcid.org/0000-0002-9371-8358"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xupeng Miao","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010483833","display_name":"Chunan Shi","orcid":"https://orcid.org/0009-0009-7197-4965"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunan Shi","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076729908","display_name":"Jiangfei Duan","orcid":"https://orcid.org/0000-0002-6327-2033"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiangfei Duan","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093367649","display_name":"Xiaoli Xi","orcid":"https://orcid.org/0009-0005-3098-2417"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoli Xi","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010087030","display_name":"Dahua Lin","orcid":"https://orcid.org/0000-0002-8865-7896"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dahua Lin","raw_affiliation_strings":["The Chinese University of Hong Kong &amp; Sensetime Research, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong &amp; Sensetime Research, Hong Kong, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062357883","display_name":"Bin Cui","orcid":"https://orcid.org/0000-0003-1681-4677"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Cui","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062437461","display_name":"Zhihao Jia","orcid":"https://orcid.org/0000-0002-1270-5185"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhihao Jia","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, United States of America"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, United States of America","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5015552951"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":15.5934,"has_fulltext":false,"cited_by_count":47,"citation_normalized_percentile":{"value":0.99245079,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1112","last_page":"1127"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.902899980545044,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.75605708360672},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.7044133543968201},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.47052252292633057},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43033507466316223},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4206390976905823}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.75605708360672},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.7044133543968201},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.47052252292633057},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43033507466316223},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4206390976905823}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3620665.3640411","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640411","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3620665.3640411","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640411","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W95608104","https://openalex.org/W2565600385","https://openalex.org/W2734941459","https://openalex.org/W2884700152","https://openalex.org/W2901299405","https://openalex.org/W2956461999","https://openalex.org/W2969388332","https://openalex.org/W2973727699","https://openalex.org/W2981449041","https://openalex.org/W2982157693","https://openalex.org/W2999905431","https://openalex.org/W3010543330","https://openalex.org/W3030163527","https://openalex.org/W3033527233","https://openalex.org/W3036879053","https://openalex.org/W3040573126","https://openalex.org/W3041710494","https://openalex.org/W3130689885","https://openalex.org/W3130716829","https://openalex.org/W3144382362","https://openalex.org/W3177263144","https://openalex.org/W3203426023","https://openalex.org/W3214762859","https://openalex.org/W4220741164","https://openalex.org/W4225004481","https://openalex.org/W4226479682","https://openalex.org/W4292779060","https://openalex.org/W4294904165","https://openalex.org/W4310282800","https://openalex.org/W4321636575","https://openalex.org/W4321853806","https://openalex.org/W4327930473","https://openalex.org/W4364382874","https://openalex.org/W4383749415","https://openalex.org/W4387321091","https://openalex.org/W6713134421","https://openalex.org/W6739901393","https://openalex.org/W6798686915","https://openalex.org/W7000926438"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2380075625","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W4391913857","https://openalex.org/W3204019825"],"abstract_inverted_index":{"The":[0],"high":[1],"computational":[2],"and":[3,77],"memory":[4],"requirements":[5],"of":[6,80],"generative":[7],"large":[8],"language":[9],"models":[10],"(LLMs)":[11],"make":[12],"it":[13],"challenging":[14],"to":[15,22,41,83],"serve":[16],"them":[17],"cheaply.":[18],"This":[19],"paper":[20],"aims":[21],"reduce":[23],"the":[24,58,78,85],"monetary":[25],"cost":[26],"for":[27],"serving":[28],"LLMs":[29,65],"by":[30,57,73],"leveraging":[31],"preemptible":[32,67],"GPU":[33,43],"instances":[34,52,68,82],"on":[35,66],"modern":[36],"clouds,":[37],"which":[38],"offer":[39],"accesses":[40],"spare":[42],"resources":[44],"at":[45,61],"a":[46],"much":[47],"cheaper":[48],"price":[49],"than":[50],"regular":[51],"but":[53],"may":[54],"be":[55],"preempted":[56],"cloud":[59],"provider":[60],"any":[62],"time.":[63],"Serving":[64],"requires":[69],"addressing":[70],"challenges":[71],"induced":[72],"frequent":[74],"instance":[75],"preemptions":[76],"necessity":[79],"migrating":[81],"handle":[84],"preemptions.":[86]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":32},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
