{"id":"https://openalex.org/W4210877935","doi":"https://doi.org/10.1145/3492323.3495594","title":"Scheduling ML training on unreliable spot instances","display_name":"Scheduling ML training on unreliable spot instances","publication_year":2021,"publication_date":"2021-12-06","ids":{"openalex":"https://openalex.org/W4210877935","doi":"https://doi.org/10.1145/3492323.3495594"},"language":"en","primary_location":{"id":"doi:10.1145/3492323.3495594","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3492323.3495594","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th IEEE/ACM International Conference on Utility and Cloud Computing Companion","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087891218","display_name":"Sheng Yang","orcid":"https://orcid.org/0000-0002-0643-0445"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sheng Yang","raw_affiliation_strings":["Northwestern University"],"affiliations":[{"raw_affiliation_string":"Northwestern University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042716179","display_name":"Samir Khuller","orcid":"https://orcid.org/0000-0002-5408-8023"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Samir Khuller","raw_affiliation_strings":["Northwestern University"],"affiliations":[{"raw_affiliation_string":"Northwestern University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064726544","display_name":"Sunav Choudhary","orcid":"https://orcid.org/0000-0002-7711-487X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sunav Choudhary","raw_affiliation_strings":["Adobe Research, India"],"affiliations":[{"raw_affiliation_string":"Adobe Research, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023708568","display_name":"Subrata Mitra","orcid":"https://orcid.org/0000-0003-1391-0102"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Subrata Mitra","raw_affiliation_strings":["Adobe Research, India"],"affiliations":[{"raw_affiliation_string":"Adobe Research, India","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073589054","display_name":"Kanak Mahadik","orcid":"https://orcid.org/0000-0002-6780-4199"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kanak Mahadik","raw_affiliation_strings":["Adobe Research"],"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5087891218"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4079,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.7032017,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8139921426773071},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6112214922904968},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5519695281982422},{"id":"https://openalex.org/keywords/rounding","display_name":"Rounding","score":0.5410065054893494},{"id":"https://openalex.org/keywords/sweet-spot","display_name":"Sweet spot","score":0.47840842604637146},{"id":"https://openalex.org/keywords/renting","display_name":"Renting","score":0.4596218168735504},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.4303521513938904},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.4301445484161377},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4177817404270172},{"id":"https://openalex.org/keywords/operations-research","display_name":"Operations research","score":0.4112618863582611},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.35040730237960815},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2962188720703125},{"id":"https://openalex.org/keywords/simulation","display_name":"Simulation","score":0.15471604466438293},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.12213900685310364}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8139921426773071},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6112214922904968},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5519695281982422},{"id":"https://openalex.org/C136625980","wikidata":"https://www.wikidata.org/wiki/Q663208","display_name":"Rounding","level":2,"score":0.5410065054893494},{"id":"https://openalex.org/C2993112377","wikidata":"https://www.wikidata.org/wiki/Q1206825","display_name":"Sweet spot","level":3,"score":0.47840842604637146},{"id":"https://openalex.org/C85502023","wikidata":"https://www.wikidata.org/wiki/Q157171","display_name":"Renting","level":2,"score":0.4596218168735504},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.4303521513938904},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.4301445484161377},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4177817404270172},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.4112618863582611},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.35040730237960815},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2962188720703125},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.15471604466438293},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.12213900685310364},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C3019090810","wikidata":"https://www.wikidata.org/wiki/Q192431","display_name":"Speed skating","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3492323.3495594","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3492323.3495594","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th IEEE/ACM International Conference on Utility and Cloud Computing Companion","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.5}],"awards":[],"funders":[{"id":"https://openalex.org/F4320307786","display_name":"Adobe Systems","ror":"https://ror.org/059tvcg64"},{"id":"https://openalex.org/F4320309475","display_name":"Northwestern University","ror":"https://ror.org/000e0be47"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2006618115","https://openalex.org/W2029177847","https://openalex.org/W2605643718","https://openalex.org/W2610956211","https://openalex.org/W2798515322","https://openalex.org/W3047261146","https://openalex.org/W6600281463"],"related_works":["https://openalex.org/W1563872154","https://openalex.org/W4220780102","https://openalex.org/W2410881844","https://openalex.org/W3196334750","https://openalex.org/W1502401885","https://openalex.org/W2004257129","https://openalex.org/W2116281088","https://openalex.org/W2357551824","https://openalex.org/W2016668641","https://openalex.org/W2019368960"],"abstract_inverted_index":{"Cloud":[0],"providers":[1,30],"rent":[2],"out":[3],"surplus":[4],"computational":[5,114],"resources":[6,115],"as":[7],"spot":[8,17,34,47,83],"instances":[9,18,35,48],"at":[10],"a":[11,37,63,107,144],"deep":[12],"discount.":[13],"However,":[14,53],"these":[15,33,172],"cheap":[16],"are":[19,62,175],"revocable.":[20],"When":[21],"demand":[22],"surges":[23],"for":[24,49,96,101],"higher":[25],"priced":[26],"on-demand":[27,191],"instances,":[28,84],"cloud":[29,98],"can":[31],"interrupt":[32],"after":[36],"brief":[38],"alert.":[39],"Such":[40,106],"unreliability":[41],"makes":[42],"it":[43],"challenging":[44],"to":[45,66,111,117,177,184,201,211],"utilize":[46],"many":[50],"long-running":[51],"jobs.":[52,132],"with":[54,182,190],"checkpoints":[55],"and":[56,139,170,174],"restoration,":[57],"machine-learning":[58],"(ML)":[59],"training":[60,104],"jobs":[61,80],"good":[64],"candidate":[65],"overcome":[67],"this":[68,71],"difficulty.":[69],"In":[70],"paper,":[72],"we":[73,157,197],"formalize":[74],"the":[75,113,135,154,179,187,194,202],"problem":[76],"of":[77,124,186],"scheduling":[78],"ML-training":[79],"on":[81,153],"transient":[82],"especially":[85],"from":[86],"an":[87,160],"ML":[88,103],"researcher's":[89],"view,":[90],"who":[91],"may":[92],"have":[93],"some":[94,129],"grant/credit":[95],"renting":[97],"computing":[99],"services":[100],"several":[102],"tasks.":[105],"researcher":[108],"would":[109],"need":[110],"partition":[112],"wisely":[116],"maximize":[118],"outcome":[119],"(or":[120],"total":[121,195],"expected":[122],"utility":[123,181,196],"all":[125],"jobs)":[126],"while":[127],"maintaining":[128],"fairness":[130],"between":[131,137],"We":[133,168],"investigate":[134],"trade-off":[136],"low-cost/interruptible":[138],"high-cost/uninterruptible":[140],"computation,":[141],"by":[142],"proposing":[143],"linear-programming":[145],"(LP)":[146],"rounding":[147],"based":[148],"polynomial":[149],"time":[150],"algorithm.":[151],"Based":[152],"LP":[155],"solution,":[156],"also":[158],"give":[159],"LP-based":[161],"heuristic":[162],"that":[163],"performs":[164],"well":[165],"in":[166],"practice.":[167],"implement":[169],"evaluate":[171],"algorithms,":[173],"able":[176],"achieve":[178],"same":[180],"23%":[183],"48%":[185],"budget":[188],"needed":[189],"instances.":[192],"Moreover,":[193],"get":[198],"is":[199],"close":[200,210],"theoretical":[203],"upper":[204],"bound":[205],"under":[206],"various":[207],"settings,":[208],"indicating":[209],"optimal":[212],"performance.":[213]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
