{"id":"https://openalex.org/W3091992486","doi":"https://doi.org/10.1145/3397166.3409128","title":"Online scheduling of heterogeneous distributed machine learning jobs","display_name":"Online scheduling of heterogeneous distributed machine learning jobs","publication_year":2020,"publication_date":"2020-10-08","ids":{"openalex":"https://openalex.org/W3091992486","doi":"https://doi.org/10.1145/3397166.3409128","mag":"3091992486"},"language":"en","primary_location":{"id":"doi:10.1145/3397166.3409128","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3397166.3409128","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-First International Symposium on Theory, Algorithmic Foundations, and Protocol Design for Mobile Networks and Mobile Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100418232","display_name":"Qin Zhang","orcid":"https://orcid.org/0000-0003-0173-0470"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qin Zhang","raw_affiliation_strings":["Wuhan University, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024396134","display_name":"Ruiting Zhou","orcid":"https://orcid.org/0000-0001-9681-6482"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]},{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruiting Zhou","raw_affiliation_strings":["Wuhan University, China and The Chinese University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Wuhan University, China and The Chinese University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633","https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012597518","display_name":"Chuan Wu","orcid":"https://orcid.org/0000-0002-3144-4398"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Chuan Wu","raw_affiliation_strings":["The University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053369746","display_name":"Lei Jiao","orcid":"https://orcid.org/0000-0002-3964-3172"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lei Jiao","raw_affiliation_strings":["University of Oregon"],"affiliations":[{"raw_affiliation_string":"University of Oregon","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066247159","display_name":"Zongpeng Li","orcid":"https://orcid.org/0000-0001-5351-2075"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zongpeng Li","raw_affiliation_strings":["Wuhan University, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University, China","institution_ids":["https://openalex.org/I37461747"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100418232"],"corresponding_institution_ids":["https://openalex.org/I37461747"],"apc_list":null,"apc_paid":null,"fwci":4.4502,"has_fulltext":false,"cited_by_count":26,"citation_normalized_percentile":{"value":0.95230428,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"111","last_page":"120"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.790805459022522},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.5588845014572144},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.5549644231796265},{"id":"https://openalex.org/keywords/job-shop-scheduling","display_name":"Job shop scheduling","score":0.5375534892082214},{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.5319054126739502},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5307666659355164},{"id":"https://openalex.org/keywords/job-scheduler","display_name":"Job scheduler","score":0.5155460834503174},{"id":"https://openalex.org/keywords/online-algorithm","display_name":"Online algorithm","score":0.46765488386154175},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.4434368312358856},{"id":"https://openalex.org/keywords/rate-monotonic-scheduling","display_name":"Rate-monotonic scheduling","score":0.42289257049560547},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.42192140221595764},{"id":"https://openalex.org/keywords/fair-share-scheduling","display_name":"Fair-share scheduling","score":0.3962979018688202},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.3550008535385132},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3136909306049347},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.26427119970321655},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.15289485454559326},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08992832899093628}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.790805459022522},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.5588845014572144},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5549644231796265},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.5375534892082214},{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.5319054126739502},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5307666659355164},{"id":"https://openalex.org/C111873713","wikidata":"https://www.wikidata.org/wiki/Q1641413","display_name":"Job scheduler","level":3,"score":0.5155460834503174},{"id":"https://openalex.org/C196921405","wikidata":"https://www.wikidata.org/wiki/Q786431","display_name":"Online algorithm","level":2,"score":0.46765488386154175},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.4434368312358856},{"id":"https://openalex.org/C127456818","wikidata":"https://www.wikidata.org/wiki/Q238879","display_name":"Rate-monotonic scheduling","level":4,"score":0.42289257049560547},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.42192140221595764},{"id":"https://openalex.org/C31689143","wikidata":"https://www.wikidata.org/wiki/Q733809","display_name":"Fair-share scheduling","level":3,"score":0.3962979018688202},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.3550008535385132},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3136909306049347},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.26427119970321655},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.15289485454559326},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08992832899093628},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3397166.3409128","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3397166.3409128","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-First International Symposium on Theory, Algorithmic Foundations, and Protocol Design for Mobile Networks and Mobile Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.5400000214576721,"display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2016601686","https://openalex.org/W2018162709","https://openalex.org/W2025616337","https://openalex.org/W2060393849","https://openalex.org/W2092461546","https://openalex.org/W2105947650","https://openalex.org/W2141992894","https://openalex.org/W2172331085","https://openalex.org/W2417036297","https://openalex.org/W2527855297","https://openalex.org/W2607684270","https://openalex.org/W2622751957","https://openalex.org/W2792596678","https://openalex.org/W2798515322","https://openalex.org/W2896633576","https://openalex.org/W2912109029","https://openalex.org/W2919897868","https://openalex.org/W2920397365","https://openalex.org/W2962684017","https://openalex.org/W2962911728","https://openalex.org/W2964321035","https://openalex.org/W3105569215"],"related_works":["https://openalex.org/W2188224146","https://openalex.org/W2138018478","https://openalex.org/W2386309393","https://openalex.org/W2381020481","https://openalex.org/W2151049026","https://openalex.org/W2071824071","https://openalex.org/W2014733634","https://openalex.org/W2070629337","https://openalex.org/W4320148132","https://openalex.org/W2020990393"],"abstract_inverted_index":{"Distributed":[0],"machine":[1],"learning":[2],"(ML)":[3],"has":[4],"played":[5],"a":[6,37,61,130,156,161,186],"key":[7],"role":[8],"in":[9,34,60,178,206],"today's":[10,207],"proliferation":[11],"of":[12,18,72,86,117,132,143,175],"AI":[13,208],"services.":[14],"A":[15,57],"typical":[16],"model":[17,32],"distributed":[19,62],"ML":[20,41,63,73,152,168],"is":[21,65,88,95],"to":[22,30,67,106,170],"partition":[23],"training":[24,42,153],"datasets":[25],"over":[26],"multiple":[27],"worker":[28],"nodes":[29],"update":[31],"parameters":[33],"parallel,":[35],"adopting":[36],"parameter":[38,121],"server":[39],"architecture.":[40],"jobs":[43,74,154,177],"are":[44],"typically":[45],"resource":[46,55,80],"elastic,":[47],"completed":[48],"using":[49,197],"various":[50],"time":[51,94,110,193],"lengths":[52],"with":[53,78,129,191],"different":[54,79],"configurations.":[56],"fundamental":[58],"problem":[59],"cluster":[64],"how":[66],"explore":[68],"the":[69,84,108,112,115,134,172,179],"demand":[70],"elasticity":[71],"and":[75,90,114,120,159],"schedule":[76],"them":[77],"configurations,":[81],"such":[82],"that":[83,149,165,201],"utilization":[85],"resources":[87],"maximized":[89],"average":[91,136],"job":[92,125,169],"completion":[93,137],"minimized.":[96],"To":[97],"address":[98],"it,":[99],"we":[100],"propose":[101],"an":[102,145],"online":[103,140,146,183],"scheduling":[104,147,163],"algorithm":[105,141,164,184],"decide":[107],"execution":[109],"window,":[111],"number":[113],"type":[116],"concurrent":[118],"workers":[119],"servers":[122],"for":[123],"each":[124,167],"upon":[126],"its":[127],"arrival,":[128],"goal":[131],"minimizing":[133],"weighted":[135],"time.":[138],"Our":[139,182],"consists":[142],"(i)":[144],"framework":[148],"groups":[150],"unprocessed":[151],"into":[155],"batch":[157,162],"iteratively,":[158],"(ii)":[160],"configures":[166],"maximize":[171],"total":[173],"weight":[174],"scheduled":[176],"current":[180],"iteration.":[181],"guarantees":[185],"good":[187],"parameterized":[188],"competitive":[189],"ratio":[190],"polynomial":[192],"complexity.":[194],"Extensive":[195],"evaluations":[196],"real-world":[198],"data":[199],"demonstrate":[200],"it":[202],"outperforms":[203],"state-of-the-art":[204],"schedulers":[205],"cloud":[209],"systems.":[210]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":3}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
