{"id":"https://openalex.org/W7123514118","doi":"https://doi.org/10.1145/3772052.3772266","title":"C <scp>uckoo</scp> : Deadline-Aware Job Packing on Heterogeneous GPUs for DL Model Training","display_name":"C <scp>uckoo</scp> : Deadline-Aware Job Packing on Heterogeneous GPUs for DL Model Training","publication_year":2025,"publication_date":"2025-11-19","ids":{"openalex":"https://openalex.org/W7123514118","doi":"https://doi.org/10.1145/3772052.3772266"},"language":null,"primary_location":{"id":"doi:10.1145/3772052.3772266","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3772052.3772266","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yuzheng Zhang","orcid":"https://orcid.org/0009-0009-9736-214X"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuzheng Zhang","raw_affiliation_strings":["Beihang University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-9736-214X","affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122972605","display_name":"Renyu Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Renyu Yang","raw_affiliation_strings":["Beihang University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-6334-4925","affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108149924","display_name":"Junhong Liu","orcid":"https://orcid.org/0009-0002-5332-1855"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junhong Liu","raw_affiliation_strings":["Beihang University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-5332-1855","affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122920006","display_name":"Weihan Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihan Jiang","raw_affiliation_strings":["Beihang University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-2101-1302","affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021414951","display_name":"T Ye","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianyu Ye","raw_affiliation_strings":["Beihang University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0000-6241-8531","affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023972038","display_name":"Yiqiao Liao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yiqiao Liao","raw_affiliation_strings":["Kuaishou Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-2635-1541","affiliations":[{"raw_affiliation_string":"Kuaishou Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122920832","display_name":"Penghao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Penghao Zhang","raw_affiliation_strings":["Kuaishou Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-7958-3378","affiliations":[{"raw_affiliation_string":"Kuaishou Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122940574","display_name":"Tiezi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tiezi Zhang","raw_affiliation_strings":["Kuaishou Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-6203-5260","affiliations":[{"raw_affiliation_string":"Kuaishou Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122932485","display_name":"Kun Shang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kun Shang","raw_affiliation_strings":["Kuaishou Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0000-0188-8628","affiliations":[{"raw_affiliation_string":"Kuaishou Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066123431","display_name":"Tianyu Wo","orcid":"https://orcid.org/0000-0002-5331-3364"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianyu Wo","raw_affiliation_strings":["Beihang University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-5331-3364","affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122977824","display_name":"Chunming Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunming Hu","raw_affiliation_strings":["Beihang University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3473-9703","affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122942158","display_name":"Chengru Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chengru Song","raw_affiliation_strings":["Kuaishou Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-3826-8436","affiliations":[{"raw_affiliation_string":"Kuaishou Inc., Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103963648","display_name":"Jin Ouyang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin Ouyang","raw_affiliation_strings":["Kuaishou Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-6591-7717","affiliations":[{"raw_affiliation_string":"Kuaishou Inc., Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":13,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.65754957,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"847","last_page":"859"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.454800009727478,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.454800009727478,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.1834000051021576,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.10180000215768814,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interleaving","display_name":"Interleaving","score":0.5647000074386597},{"id":"https://openalex.org/keywords/job-scheduler","display_name":"Job scheduler","score":0.5101000070571899},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5015000104904175},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.41119998693466187},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.41100001335144043},{"id":"https://openalex.org/keywords/execution-time","display_name":"Execution time","score":0.38589999079704285},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.382099986076355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8432000279426575},{"id":"https://openalex.org/C28034677","wikidata":"https://www.wikidata.org/wiki/Q17092530","display_name":"Interleaving","level":2,"score":0.5647000074386597},{"id":"https://openalex.org/C111873713","wikidata":"https://www.wikidata.org/wiki/Q1641413","display_name":"Job scheduler","level":3,"score":0.5101000070571899},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5015000104904175},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.482699990272522},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.41119998693466187},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.41100001335144043},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.38589999079704285},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.382099986076355},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.36489999294281006},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3483000099658966},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3084000051021576},{"id":"https://openalex.org/C51332947","wikidata":"https://www.wikidata.org/wiki/Q1172305","display_name":"Shared resource","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.26919999718666077},{"id":"https://openalex.org/C171627638","wikidata":"https://www.wikidata.org/wiki/Q6206744","display_name":"Job queue","level":4,"score":0.26080000400543213},{"id":"https://openalex.org/C2778583658","wikidata":"https://www.wikidata.org/wiki/Q849415","display_name":"On-the-job training","level":3,"score":0.2531000077724457},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3772052.3772266","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3772052.3772266","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.7210636734962463,"id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2335814492","https://openalex.org/W2767239597","https://openalex.org/W2919897868","https://openalex.org/W3012536640","https://openalex.org/W3047783725","https://openalex.org/W3108033633","https://openalex.org/W3130554079","https://openalex.org/W3157306683","https://openalex.org/W3197816522","https://openalex.org/W4290991121","https://openalex.org/W4316252403","https://openalex.org/W4318541537","https://openalex.org/W4387321109","https://openalex.org/W4390263770","https://openalex.org/W4394969374"],"related_works":[],"abstract_inverted_index":{"The":[0,71,154],"growing":[1],"scale":[2],"and":[3,34,64,116,132,144,189,203,223],"heterogeneity":[4,149],"of":[5,31,48,107,121,127,147,232],"GPU":[6,23,80,109,148,243],"clusters":[7,244],"pose":[8],"new":[9],"challenges":[10],"to":[11,51,67,76,179,229],"deep":[12,97],"learning":[13,98],"(DL)":[14],"job":[15,72,152,155,182,206],"scheduling.":[16],"While":[17,43],"existing":[18],"schedulers":[19],"primarily":[20],"focus":[21],"on":[22,150,221,240],"utilization,":[24],"they":[25],"often":[26],"ignore":[27],"multi-dimensional":[28,112],"resource":[29,53,62],"demands":[30],"DL":[32],"workloads":[33],"lack":[35],"precise":[36],"execution":[37,50,126,136],"time":[38,137,208],"estimation":[39,138],"for":[40],"co-located":[41],"jobs.":[42,122],"Muri":[44],"pioneered":[45],"the":[46,68,124,140,145,151,169,180,214],"use":[47],"interleaving":[49,125],"improve":[52],"efficiency,":[54],"it":[55],"simplified":[56],"interference":[57,143],"when":[58,78],"jobs":[59,99,129,235],"using":[60],"one":[61],"simultaneously":[63],"is":[65,130,157,165,218],"agnostic":[66],"deadline":[69,102,198],"constraints.":[70],"grouping":[73],"also":[74],"comes":[75],"suboptimal":[77],"heterogeneous":[79,108,177,242],"devices":[81,110],"are":[82,114],"taken":[83],"into":[84],"account.":[85],"In":[86],"this":[87],"paper,":[88],"we":[89],"propose":[90],"Cuckoo,":[91],"a":[92,105,119,160,185],"scheduling":[93],"system":[94],"that":[95,195,236],"packs":[96],"with":[100,213],"stringent":[101],"requirements":[103],"over":[104],"set":[106],"where":[111],"resources":[113,178],"interleaved":[115,239],"shared":[117,241],"by":[118,168,201,210],"group":[120],"Specifically,":[123],"simultaneous":[128],"characterized":[131],"modeled":[133],"through":[134,184],"stage-grained":[135],"considering":[139],"runtime":[141],"performance":[142],"impact":[146],"performance.":[153],"packing":[156],"formulated":[158],"as":[159],"multi-objective":[161],"optimization":[162],"problem":[163],"which":[164],"then":[166,175],"solved":[167],"maximum":[170,187],"weight":[171],"matching":[172],"algorithm.":[173,192],"Cuckoo":[174,196,217],"allocates":[176],"packed":[181],"groups":[183],"graph-based":[186],"flow":[188],"minimum":[190],"cut":[191],"Experiments":[193],"show":[194],"improves":[197],"satisfaction":[199],"rate":[200],"2.38x":[202],"reduces":[204],"average":[205],"completion":[207],"(JCT)":[209],"1.81x":[211],"compared":[212],"state-of-the-art":[215],"approaches.":[216],"implemented":[219],"based":[220],"Kubernetes":[222],"has":[224],"been":[225],"deployed":[226],"in":[227],"Kuaishou":[228],"serve":[230],"thousands":[231],"model":[233],"training":[234],"can":[237],"be":[238]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-14T00:00:00"}
