{"id":"https://openalex.org/W7123898572","doi":"https://doi.org/10.1145/3772052.3772229","title":"ZipBatch: Multi-Tenant GPU Batching with Dual-Resource Regulation","display_name":"ZipBatch: Multi-Tenant GPU Batching with Dual-Resource Regulation","publication_year":2025,"publication_date":"2025-11-19","ids":{"openalex":"https://openalex.org/W7123898572","doi":"https://doi.org/10.1145/3772052.3772229"},"language":"en","primary_location":{"id":"doi:10.1145/3772052.3772229","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772052.3772229","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3772052.3772229","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Haoxuan Yu","orcid":"https://orcid.org/0009-0008-9677-7960"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Haoxuan Yu","raw_affiliation_strings":["Hong Kong University of Science and Technology, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0009-0008-9677-7960","affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122952170","display_name":"Sheng Yao","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Sheng Yao","raw_affiliation_strings":["Hong Kong University of Science and Technology, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0009-0003-5143-8352","affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5122971395","display_name":"Wei Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Wei Wang","raw_affiliation_strings":["Hong Kong University of Science and Technology, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-4585-4152","affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I200769079"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.65937977,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"240","last_page":"254"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9153000116348267,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9153000116348267,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.026200000196695328,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.015200000256299973,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/burstiness","display_name":"Burstiness","score":0.8331000208854675},{"id":"https://openalex.org/keywords/multiplexing","display_name":"Multiplexing","score":0.6481000185012817},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.5964999794960022},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5198000073432922},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.46209999918937683},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.45829999446868896},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.41019999980926514},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.40049999952316284},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.38420000672340393}],"concepts":[{"id":"https://openalex.org/C2781023610","wikidata":"https://www.wikidata.org/wiki/Q17006304","display_name":"Burstiness","level":3,"score":0.8331000208854675},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.824999988079071},{"id":"https://openalex.org/C19275194","wikidata":"https://www.wikidata.org/wiki/Q222903","display_name":"Multiplexing","level":2,"score":0.6481000185012817},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5964999794960022},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5198000073432922},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.46209999918937683},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.45829999446868896},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4521999955177307},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.41019999980926514},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.40049999952316284},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3856000006198883},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.38530001044273376},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.38420000672340393},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.3637000024318695},{"id":"https://openalex.org/C32022120","wikidata":"https://www.wikidata.org/wiki/Q797225","display_name":"Interference (communication)","level":3,"score":0.3544999957084656},{"id":"https://openalex.org/C173512123","wikidata":"https://www.wikidata.org/wiki/Q5978010","display_name":"Statistical time division multiplexing","level":3,"score":0.3531000018119812},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3407999873161316},{"id":"https://openalex.org/C50661577","wikidata":"https://www.wikidata.org/wiki/Q901831","display_name":"Time-division multiplexing","level":3,"score":0.31700000166893005},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.31360000371932983},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.31360000371932983},{"id":"https://openalex.org/C22684755","wikidata":"https://www.wikidata.org/wiki/Q847526","display_name":"Queueing theory","level":2,"score":0.30489999055862427},{"id":"https://openalex.org/C2780609101","wikidata":"https://www.wikidata.org/wiki/Q17156588","display_name":"Resource management (computing)","level":2,"score":0.2922999858856201},{"id":"https://openalex.org/C2984118289","wikidata":"https://www.wikidata.org/wiki/Q29954","display_name":"Power consumption","level":3,"score":0.2892000079154968},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C3017813396","wikidata":"https://www.wikidata.org/wiki/Q17078173","display_name":"Resource constraints","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C158379750","wikidata":"https://www.wikidata.org/wiki/Q214111","display_name":"Network packet","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.25540000200271606}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3772052.3772229","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772052.3772229","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-168725","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-168725","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference paper"}],"best_oa_location":{"id":"doi:10.1145/3772052.3772229","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772052.3772229","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.480445921421051,"display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1506474685","https://openalex.org/W1901129140","https://openalex.org/W2194775991","https://openalex.org/W2581065617","https://openalex.org/W2604787577","https://openalex.org/W2896457183","https://openalex.org/W2918317383","https://openalex.org/W3094502228","https://openalex.org/W3158444059","https://openalex.org/W4234578094","https://openalex.org/W4318541676","https://openalex.org/W4327810158","https://openalex.org/W4360831842","https://openalex.org/W4385889887","https://openalex.org/W4387321091","https://openalex.org/W4387321503","https://openalex.org/W4388031348","https://openalex.org/W4403364019","https://openalex.org/W4403783293","https://openalex.org/W4404787869","https://openalex.org/W4408867174","https://openalex.org/W4408867487","https://openalex.org/W4414735877"],"related_works":[],"abstract_inverted_index":{"GPU":[0,7,37],"multiplexing":[1],"is":[2],"a":[3,36],"widely-adopted":[4],"strategy":[5],"in":[6,24,82],"clusters":[8],"for":[9],"improving":[10],"overall":[11],"throughput":[12],"and":[13,27,54,85],"lowering":[14],"the":[15,49,63,72],"total":[16],"cost":[17],"of":[18],"ownership.":[19],"To":[20],"mitigate":[21],"inter-task":[22],"interference":[23],"compute":[25,83],"power":[26,84],"memory":[28,86],"bandwidth":[29],"on":[30],"multiplexed":[31],"GPUs,":[32],"existing":[33],"techniques":[34],"divide":[35],"into":[38],"instances":[39],"with":[40],"limited":[41],"predefined":[42],"rigid":[43],"configurations.":[44],"Low":[45],"utilization":[46,78],"arises":[47],"from":[48],"mismatch":[50],"between":[51],"heterogeneous":[52],"burstiness":[53],"immutable":[55],"resource":[56,77],"configurations:":[57],"1)":[58],"bursty":[59,75],"inference":[60],"traffic":[61],"forces":[62],"scheduler":[64],"to":[65,80],"launch":[66],"underfilled":[67],"batches":[68],"that":[69],"cannot":[70],"saturate":[71],"instance;":[73],"2)":[74],"kernel":[76],"leads":[79],"bubbles":[81],"bandwidth.":[87]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-14T00:00:00"}
