{"id":"https://openalex.org/W4409060284","doi":"https://doi.org/10.1145/3721146.3721937","title":"Manage the Workloads not the Cluster: Designing a Control Plane for Large-Scale AI Clusters","display_name":"Manage the Workloads not the Cluster: Designing a Control Plane for Large-Scale AI Clusters","publication_year":2025,"publication_date":"2025-03-30","ids":{"openalex":"https://openalex.org/W4409060284","doi":"https://doi.org/10.1145/3721146.3721937"},"language":"en","primary_location":{"id":"doi:10.1145/3721146.3721937","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3721146.3721937","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Workshop on Machine Learning and Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3721146.3721937","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018392500","display_name":"Ruiqi Lai","orcid":"https://orcid.org/0009-0001-6067-0976"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruiqi Lai","raw_affiliation_strings":["NTU Singapore, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0009-0001-6067-0976","affiliations":[{"raw_affiliation_string":"NTU Singapore, Singapore, Singapore","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111028335","display_name":"Siyu Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Siyu Cao","raw_affiliation_strings":["NTU Singapore, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0009-0001-9593-2761","affiliations":[{"raw_affiliation_string":"NTU Singapore, Singapore, Singapore","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048783431","display_name":"L. Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Leqi Li","raw_affiliation_strings":["NTU Singapore, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0009-0002-0317-1027","affiliations":[{"raw_affiliation_string":"NTU Singapore, Singapore, Singapore","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034207820","display_name":"Luo Mai","orcid":"https://orcid.org/0000-0002-3594-1092"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Luo Mai","raw_affiliation_strings":["University of Edinburgh, Edinburgh, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0002-3594-1092","affiliations":[{"raw_affiliation_string":"University of Edinburgh, Edinburgh, United Kingdom","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083116172","display_name":"Dmitrii Ustiugov","orcid":"https://orcid.org/0000-0003-3156-010X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dmitrii Ustiugov","raw_affiliation_strings":["NTU Singapore, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0003-3156-010X","affiliations":[{"raw_affiliation_string":"NTU Singapore, Singapore, Singapore","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.7179,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.93902608,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"246","last_page":"253"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9451000094413757,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.7346653342247009},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6921365857124329},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.6053696870803833},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.20967552065849304},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.07209175825119019}],"concepts":[{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.7346653342247009},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6921365857124329},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.6053696870803833},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.20967552065849304},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.07209175825119019},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3721146.3721937","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3721146.3721937","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Workshop on Machine Learning and Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:dr.ntu.edu.sg:10356/201480","is_oa":false,"landing_page_url":"https://hdl.handle.net/10356/201480","pdf_url":null,"source":{"id":"https://openalex.org/S4306402609","display_name":"DR-NTU (Nanyang Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I172675005","host_organization_name":"Nanyang Technological University","host_organization_lineage":["https://openalex.org/I172675005"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":null,"raw_type":"Conference Paper"}],"best_oa_location":{"id":"doi:10.1145/3721146.3721937","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3721146.3721937","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Workshop on Machine Learning and Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2141992894","https://openalex.org/W4313458650","https://openalex.org/W4381611640","https://openalex.org/W4387321091","https://openalex.org/W4395106452"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0],"rapid":[1,189],"adoption":[2],"of":[3,75,125,139,155,160],"large":[4],"language":[5],"model":[6,186],"(LLM)":[7],"services,":[8],"such":[9],"as":[10,56,60,96],"ChatGPT":[11],"and":[12,40,47,63,150,170,196],"DeepSeek,":[13],"has":[14],"created":[15],"unprecedented":[16],"demand":[17],"for":[18],"computational":[19],"resources,":[20],"particularly":[21],"on":[22],"accelerator-equipped":[23],"clusters":[24],"(e.g.,":[25],"GPUs,":[26],"NPUs).":[27],"These":[28],"workloads":[29],"present":[30],"unique":[31],"challenges":[32],"due":[33],"to":[34,72,120,165],"their":[35],"highly":[36],"dynamic":[37],"traffic":[38],"patterns":[39],"multi-dimensional":[41,147],"resource":[42,79,137,148,153,190],"demands,":[43],"including":[44],"power,":[45],"memory,":[46],"computing.":[48],"Existing":[49],"GPU":[50,167],"cluster":[51,94,112,168],"management":[52,95],"systems":[53],"fall":[54],"short,":[55],"they":[57],"treat":[58],"accelerators":[59],"monolithic":[61],"units":[62],"allocate":[64],"resources":[65,128,145],"once":[66],"at":[67],"the":[68,76,82,91,101,111,117,122,130,133],"placement":[69],"time,":[70],"leading":[71],"imbalanced":[73],"utilization":[74,123,154,169],"above":[77],"three":[78,127],"types":[80],"across":[81,132],"cluster.":[83,134],"To":[84],"address":[85],"these":[86],"issues,":[87],"we":[88],"propose":[89],"redefining":[90],"LLM":[92,140,156,176],"serving":[93],"a":[97],"bin-packing":[98],"problem,":[99],"where":[100],"resource-specific":[102],"budgets":[103,149],"abstract":[104],"away":[105,143],"hardware":[106,144,161],"resources.":[107,162],"We":[108],"introduce":[109],"Shapeshifter,":[110],"manager":[113],"that":[114],"dynamically":[115],"adjusts":[116],"workload":[118,157],"deployments":[119],"balance":[121],"levels":[124],"all":[126],"in":[129],"GPUs":[131],"Shapeshifter":[135],"monitors":[136],"demands":[138],"workload,":[141],"abstracts":[142],"with":[146],"continuously":[151],"re-balances":[152],"before":[158],"allocation":[159],"ShapeShifter":[163],"aims":[164],"increase":[166],"deployment":[171],"density":[172],"while":[173],"delivering":[174],"high-quality":[175],"inference":[177],"serving.":[178],"Key":[179],"future":[180],"research":[181],"directions":[182],"include":[183],"exploring":[184,188],"multidimensional":[185],"placement,":[187],"rebalancing":[191],"mechanisms":[192],"without":[193],"service":[194],"disruption,":[195],"efficient":[197],"scheduler":[198],"policy":[199],"design.":[200]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
