{"id":"https://openalex.org/W4417529551","doi":"https://doi.org/10.48550/arxiv.2512.16099","title":"An Online Fragmentation-Aware Scheduler for Managing GPU-Sharing Workloads on Multi-Instance GPUs","display_name":"An Online Fragmentation-Aware Scheduler for Managing GPU-Sharing Workloads on Multi-Instance GPUs","publication_year":2025,"publication_date":"2025-12-18","ids":{"openalex":"https://openalex.org/W4417529551","doi":"https://doi.org/10.48550/arxiv.2512.16099"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2512.16099","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.16099","pdf_url":"https://arxiv.org/pdf/2512.16099","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2512.16099","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113098609","display_name":"Hsu-Tzu Ting","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ting, Hsu-Tzu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005498754","display_name":"Jerry Chou","orcid":"https://orcid.org/0000-0001-7851-1140"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chou, Jerry","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089889150","display_name":"Ming-Hung Chen","orcid":"https://orcid.org/0000-0002-6946-2313"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Ming-Hung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5108479300","display_name":"I\u2010Hsin Chung","orcid":"https://orcid.org/0000-0003-4555-9257"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chung, I-Hsin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5113098609"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6996999979019165,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6996999979019165,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.1062999963760376,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.09809999912977219,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6017000079154968},{"id":"https://openalex.org/keywords/pci-express","display_name":"PCI Express","score":0.5813000202178955},{"id":"https://openalex.org/keywords/job-shop-scheduling","display_name":"Job shop scheduling","score":0.4503999948501587},{"id":"https://openalex.org/keywords/fragmentation","display_name":"Fragmentation (computing)","score":0.4359000027179718},{"id":"https://openalex.org/keywords/shared-resource","display_name":"Shared resource","score":0.43160000443458557},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.37720000743865967},{"id":"https://openalex.org/keywords/job-scheduler","display_name":"Job scheduler","score":0.3734999895095825},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.3625999987125397}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8730999827384949},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6017000079154968},{"id":"https://openalex.org/C64270927","wikidata":"https://www.wikidata.org/wiki/Q206924","display_name":"PCI Express","level":3,"score":0.5813000202178955},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5724999904632568},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.4503999948501587},{"id":"https://openalex.org/C191015642","wikidata":"https://www.wikidata.org/wiki/Q1132459","display_name":"Fragmentation (computing)","level":2,"score":0.4359000027179718},{"id":"https://openalex.org/C51332947","wikidata":"https://www.wikidata.org/wiki/Q1172305","display_name":"Shared resource","level":2,"score":0.43160000443458557},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.37720000743865967},{"id":"https://openalex.org/C111873713","wikidata":"https://www.wikidata.org/wiki/Q1641413","display_name":"Job scheduler","level":3,"score":0.3734999895095825},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3625999987125397},{"id":"https://openalex.org/C171627638","wikidata":"https://www.wikidata.org/wiki/Q6206744","display_name":"Job queue","level":4,"score":0.3619000017642975},{"id":"https://openalex.org/C2775941552","wikidata":"https://www.wikidata.org/wiki/Q25212305","display_name":"Isolation (microbiology)","level":2,"score":0.3587000072002411},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.35679998993873596},{"id":"https://openalex.org/C2984822820","wikidata":"https://www.wikidata.org/wiki/Q1123036","display_name":"Processor scheduling","level":3,"score":0.33649998903274536},{"id":"https://openalex.org/C38725249","wikidata":"https://www.wikidata.org/wiki/Q913876","display_name":"Time-sharing","level":2,"score":0.3158999979496002},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.31200000643730164},{"id":"https://openalex.org/C2983523559","wikidata":"https://www.wikidata.org/wiki/Q410657","display_name":"On demand","level":2,"score":0.2971999943256378},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C142355369","wikidata":"https://www.wikidata.org/wiki/Q7698919","display_name":"Temporal isolation among virtual machines","level":4,"score":0.26420000195503235}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2512.16099","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.16099","pdf_url":"https://arxiv.org/pdf/2512.16099","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2512.16099","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.16099","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2512.16099","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.16099","pdf_url":"https://arxiv.org/pdf/2512.16099","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Modern":[0],"GPU":[1,25,34,56,68,131],"workloads":[2],"increasingly":[3],"demand":[4],"efficient":[5],"resource":[6,29,44],"sharing,":[7],"as":[8,52],"many":[9],"jobs":[10],"do":[11],"not":[12,82],"require":[13],"the":[14,155],"full":[15],"capacity":[16],"of":[17,76],"a":[18,59],"GPU.":[19],"Among":[20],"sharing":[21,69],"techniques,":[22],"NVIDIA's":[23],"Multi-Instance":[24],"(MIG)":[26],"offers":[27],"strong":[28],"isolation":[30],"by":[31,158],"enabling":[32],"hardware-level":[33],"partitioning.":[35],"However,":[36],"leveraging":[37],"MIG":[38,78],"effectively":[39],"introduces":[40],"new":[41],"challenges.":[42],"First,":[43],"contention":[45,128],"persists":[46],"due":[47,71],"to":[48,72,126,133,160],"shared":[49],"components":[50],"such":[51],"PCIe":[53],"bandwidth.":[54],"Second,":[55],"fragmentation":[57],"becomes":[58],"critical":[60],"issue,":[61],"which":[62],"is":[63],"different":[64],"from":[65,84,89],"prior":[66],"fine-grained":[67],"work":[70],"MIG's":[73],"limited":[74],"number":[75],"valid":[77],"configurations.":[79],"Fragmentation":[80],"arises":[81],"only":[83],"spatial":[85],"discontinuity":[86],"but":[87],"also":[88],"rigid":[90],"profile":[91],"placement":[92,125],"constraints,":[93],"especially":[94],"after":[95],"job":[96,118,124],"arrivals":[97],"and":[98,117,129,137],"terminations.":[99],"To":[100],"address":[101],"these":[102],"issues,":[103],"we":[104],"propose":[105],"an":[106],"online":[107],"scheduling":[108],"framework":[109],"that":[110,143],"integrates":[111],"conditional":[112],"load":[113],"balancing,":[114],"dynamic":[115],"partitioning,":[116],"migration.":[119],"Our":[120],"approach":[121],"dynamically":[122],"adapts":[123],"minimize":[127],"reorganizes":[130],"allocations":[132],"combat":[134],"both":[135],"internal":[136],"external":[138],"fragmentation.":[139],"Experimental":[140],"results":[141],"show":[142],"our":[144],"method":[145],"significantly":[146],"improves":[147,157],"system":[148],"efficiency.":[149],"When":[150],"all":[151],"techniques":[152],"are":[153],"applied,":[154],"makespan":[156],"up":[159],"35%.":[161]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-12-21T00:00:00"}
