{"id":"https://openalex.org/W4205500752","doi":"https://doi.org/10.1109/tcc.2022.3143153","title":"Online Scheduling Algorithm for Heterogeneous Distributed Machine Learning Jobs","display_name":"Online Scheduling Algorithm for Heterogeneous Distributed Machine Learning Jobs","publication_year":2022,"publication_date":"2022-01-14","ids":{"openalex":"https://openalex.org/W4205500752","doi":"https://doi.org/10.1109/tcc.2022.3143153"},"language":"en","primary_location":{"id":"doi:10.1109/tcc.2022.3143153","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcc.2022.3143153","pdf_url":null,"source":{"id":"https://openalex.org/S2492498579","display_name":"IEEE Transactions on Cloud Computing","issn_l":"2168-7161","issn":["2168-7161","2372-0018"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cloud Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024396134","display_name":"Ruiting Zhou","orcid":"https://orcid.org/0000-0001-9681-6482"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]},{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN","HK"],"is_corresponding":true,"raw_author_name":"Ruiting Zhou","raw_affiliation_strings":["Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, Hubei, China","Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043162940","display_name":"Jinlong Pang","orcid":"https://orcid.org/0000-0001-6425-9669"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinlong Pang","raw_affiliation_strings":["Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, Hubei, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100418232","display_name":"Qin Zhang","orcid":"https://orcid.org/0000-0003-0173-0470"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qin Zhang","raw_affiliation_strings":["School of Computer Science, Wuhan University, Hubei, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Wuhan University, Hubei, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012597518","display_name":"Chuan Wu","orcid":"https://orcid.org/0000-0002-3144-4398"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Chuan Wu","raw_affiliation_strings":["University of Hong Kong, Kowloon, Hong Kong"],"affiliations":[{"raw_affiliation_string":"University of Hong Kong, Kowloon, Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053369746","display_name":"Lei Jiao","orcid":"https://orcid.org/0000-0002-3964-3172"},"institutions":[{"id":"https://openalex.org/I181233156","display_name":"University of Oregon","ror":"https://ror.org/0293rh119","country_code":"US","type":"education","lineage":["https://openalex.org/I181233156"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lei Jiao","raw_affiliation_strings":["University of Oregon, Eugene, OR, USA"],"affiliations":[{"raw_affiliation_string":"University of Oregon, Eugene, OR, USA","institution_ids":["https://openalex.org/I181233156"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066672810","display_name":"Yi Zhong","orcid":"https://orcid.org/0000-0002-0626-7510"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Zhong","raw_affiliation_strings":["Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, Hubei, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066247159","display_name":"Zongpeng Li","orcid":"https://orcid.org/0000-0001-5351-2075"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zongpeng Li","raw_affiliation_strings":["School of Computer Science, Wuhan University, Hubei, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Wuhan University, Hubei, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5024396134"],"corresponding_institution_ids":["https://openalex.org/I177725633","https://openalex.org/I37461747"],"apc_list":null,"apc_paid":null,"fwci":7.3327,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.97132708,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"11","issue":"2","first_page":"1514","last_page":"1529"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7683504819869995},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.6304649710655212},{"id":"https://openalex.org/keywords/job-shop-scheduling","display_name":"Job shop scheduling","score":0.5720645189285278},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5465221405029297},{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.5122515559196472},{"id":"https://openalex.org/keywords/online-algorithm","display_name":"Online algorithm","score":0.4575651288032532},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.4488412141799927},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.4118794798851013},{"id":"https://openalex.org/keywords/job-scheduler","display_name":"Job scheduler","score":0.4111824631690979},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3982848525047302},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3851417899131775},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3820658028125763},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.36134010553359985},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.21018359065055847},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15411821007728577},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.14940598607063293}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7683504819869995},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.6304649710655212},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.5720645189285278},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5465221405029297},{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.5122515559196472},{"id":"https://openalex.org/C196921405","wikidata":"https://www.wikidata.org/wiki/Q786431","display_name":"Online algorithm","level":2,"score":0.4575651288032532},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.4488412141799927},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.4118794798851013},{"id":"https://openalex.org/C111873713","wikidata":"https://www.wikidata.org/wiki/Q1641413","display_name":"Job scheduler","level":3,"score":0.4111824631690979},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3982848525047302},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3851417899131775},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3820658028125763},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.36134010553359985},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.21018359065055847},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15411821007728577},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.14940598607063293},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcc.2022.3143153","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcc.2022.3143153","pdf_url":null,"source":{"id":"https://openalex.org/S2492498579","display_name":"IEEE Transactions on Cloud Computing","issn_l":"2168-7161","issn":["2168-7161","2372-0018"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cloud Computing","raw_type":"journal-article"},{"id":"pmh:oai:hub.hku.hk:10722/318076","is_oa":false,"landing_page_url":"https://hub.hku.hk/handle/10722/318076","pdf_url":null,"source":{"id":"https://openalex.org/S4377196271","display_name":"The HKU Scholars Hub (University of Hong Kong)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I889458895","host_organization_name":"University of Hong Kong","host_organization_lineage":["https://openalex.org/I889458895"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[{"id":"https://openalex.org/G2124776390","display_name":null,"funder_award_id":"CNS-2047719","funder_id":"https://openalex.org/F4320335353","funder_display_name":"National Science Foundation of Sri Lanka"},{"id":"https://openalex.org/G3762208358","display_name":null,"funder_award_id":"62072344","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8024709401","display_name":null,"funder_award_id":"U20A20177","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335353","display_name":"National Science Foundation of Sri Lanka","ror":"https://ror.org/010xaa060"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W1442374986","https://openalex.org/W1890643295","https://openalex.org/W2016601686","https://openalex.org/W2018162709","https://openalex.org/W2025616337","https://openalex.org/W2060393849","https://openalex.org/W2092461546","https://openalex.org/W2105947650","https://openalex.org/W2125053293","https://openalex.org/W2141992894","https://openalex.org/W2148204686","https://openalex.org/W2163961697","https://openalex.org/W2172331085","https://openalex.org/W2186615578","https://openalex.org/W2189465200","https://openalex.org/W2527855297","https://openalex.org/W2607684270","https://openalex.org/W2622751957","https://openalex.org/W2789335636","https://openalex.org/W2798515322","https://openalex.org/W2804682296","https://openalex.org/W2896633576","https://openalex.org/W2901460396","https://openalex.org/W2912109029","https://openalex.org/W2919594608","https://openalex.org/W2919897868","https://openalex.org/W2920397365","https://openalex.org/W2921179295","https://openalex.org/W2953384591","https://openalex.org/W2962684017","https://openalex.org/W2962758826","https://openalex.org/W2962911728","https://openalex.org/W2963403751","https://openalex.org/W2964321035","https://openalex.org/W2972874238","https://openalex.org/W3092390737","https://openalex.org/W3096583839","https://openalex.org/W3096956001","https://openalex.org/W4288289123","https://openalex.org/W6628377381","https://openalex.org/W6639249596","https://openalex.org/W6684084819","https://openalex.org/W6686509673","https://openalex.org/W6687322159","https://openalex.org/W6713134421","https://openalex.org/W6739693220","https://openalex.org/W6751627690","https://openalex.org/W6758283263","https://openalex.org/W6759814162","https://openalex.org/W6767869616","https://openalex.org/W6769424276","https://openalex.org/W6782839094","https://openalex.org/W6784871562"],"related_works":["https://openalex.org/W1908901187","https://openalex.org/W2139982839","https://openalex.org/W1984267569","https://openalex.org/W2092966558","https://openalex.org/W1956651153","https://openalex.org/W2347561926","https://openalex.org/W2131569046","https://openalex.org/W2327547880","https://openalex.org/W4390203503","https://openalex.org/W2045215405"],"abstract_inverted_index":{"Distributed":[0],"machine":[1],"learning":[2],"(ML)":[3],"has":[4],"played":[5],"a":[6,37,67,136,162,167,192],"key":[7],"role":[8],"in":[9,34,66,184,212],"today's":[10,213],"proliferation":[11],"of":[12,18,78,92,123,138,149,181],"AI":[13,214],"services.":[14],"A":[15,63],"typical":[16],"model":[17,32],"distributed":[19,68],"ML":[20,47,69,79,158,174],"is":[21,71,94,101],"to":[22,30,73,112,176],"partition":[23],"training":[24,48,159],"datasets":[25],"over":[26],"multiple":[27],"worker":[28],"nodes":[29],"update":[31],"parameters":[33],"parallel,":[35],"adopting":[36],"<italic":[38,43],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[39,44],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">parameter":[40],"server</i>":[41],"or":[42],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">AllReduce</i>":[45],"architecture.":[46],"jobs":[49,80,160,183],"are":[50],"typically":[51],"resource":[52,61,86],"elastic,":[53],"completed":[54],"using":[55,203],"various":[56],"time":[57,100,116,199],"lengths":[58],"with":[59,84,135,197],"different":[60,85],"configurations.":[62],"fundamental":[64],"problem":[65],"cluster":[70],"how":[72],"explore":[74],"the":[75,90,114,118,121,140,178,185],"demand":[76],"elasticity":[77],"and":[81,96,120,126,165],"schedule":[82],"them":[83],"configurations,":[87],"such":[88],"that":[89,155,171,207],"utilization":[91],"resources":[93],"maximized":[95],"average":[97,142],"job":[98,131,175],"completion":[99,143],"minimized.":[102],"To":[103],"address":[104],"it,":[105],"we":[106],"propose":[107],"an":[108,151],"online":[109,146,152,189],"scheduling":[110,153,169],"algorithm":[111,147,170,190],"decide":[113],"execution":[115],"window,":[117],"number":[119],"type":[122],"concurrent":[124],"workers":[125],"parameter":[127],"servers":[128],"for":[129],"each":[130,173],"upon":[132],"its":[133],"arrival,":[134],"goal":[137],"minimizing":[139],"weighted":[141],"time.":[144],"Our":[145,188],"consists":[148],"(i)":[150],"framework":[154],"groups":[156],"unprocessed":[157],"into":[161],"batch":[163,168],"iteratively,":[164],"(ii)":[166],"configures":[172],"maximize":[177],"total":[179],"weight":[180],"scheduled":[182],"current":[186],"iteration.":[187],"guarantees":[191],"good":[193],"parameterized":[194],"competitive":[195],"ratio":[196],"polynomial":[198],"complexity.":[200],"Extensive":[201],"evaluations":[202],"real-world":[204],"data":[205],"demonstrate":[206],"it":[208],"outperforms":[209],"state-of-the-art":[210],"schedulers":[211],"cloud":[215],"systems.":[216]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
