{"id":"https://openalex.org/W4416203719","doi":"https://doi.org/10.1145/3712285.3759857","title":"BOER: Enhancing Resource Utilization for Deep Learning Inference with Hybrid Spatial GPU Sharing","display_name":"BOER: Enhancing Resource Utilization for Deep Learning Inference with Hybrid Spatial GPU Sharing","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W4416203719","doi":"https://doi.org/10.1145/3712285.3759857"},"language":null,"primary_location":{"id":"doi:10.1145/3712285.3759857","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712285.3759857","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092189732","display_name":"Bowen Zhang","orcid":"https://orcid.org/0009-0003-1130-1685"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bowen Zhang","raw_affiliation_strings":["Southern University of Science and Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100594763","display_name":"Yuhang Wang","orcid":"https://orcid.org/0009-0002-0167-419X"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhang Wang","raw_affiliation_strings":["Southern University of Science and Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087300252","display_name":"Zhuozhao Li","orcid":"https://orcid.org/0000-0003-1903-6428"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuozhao Li","raw_affiliation_strings":["Southern University of Science and Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5092189732"],"corresponding_institution_ids":["https://openalex.org/I3045169105"],"apc_list":null,"apc_paid":null,"fwci":2.5505,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.91532986,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"519","last_page":"532"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5702999830245972,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5702999830245972,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.14190000295639038,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.04969999939203262,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6396999955177307},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5629000067710876},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5547999739646912},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5440000295639038},{"id":"https://openalex.org/keywords/testbed","display_name":"Testbed","score":0.5045999884605408},{"id":"https://openalex.org/keywords/bayesian-optimization","display_name":"Bayesian optimization","score":0.4790000021457672},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.4404999911785126},{"id":"https://openalex.org/keywords/bayesian-inference","display_name":"Bayesian inference","score":0.43290001153945923},{"id":"https://openalex.org/keywords/multiplexing","display_name":"Multiplexing","score":0.40400001406669617},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.362199991941452}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8101000189781189},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6396999955177307},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5993000268936157},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5629000067710876},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5547999739646912},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5440000295639038},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5284000039100647},{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.5045999884605408},{"id":"https://openalex.org/C2778049539","wikidata":"https://www.wikidata.org/wiki/Q17002908","display_name":"Bayesian optimization","level":2,"score":0.4790000021457672},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.4404999911785126},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.43290001153945923},{"id":"https://openalex.org/C19275194","wikidata":"https://www.wikidata.org/wiki/Q222903","display_name":"Multiplexing","level":2,"score":0.40400001406669617},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3806999921798706},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.375},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.362199991941452},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.35030001401901245},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.3476000130176544},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.33570000529289246},{"id":"https://openalex.org/C119701452","wikidata":"https://www.wikidata.org/wiki/Q5165881","display_name":"Control reconfiguration","level":2,"score":0.33219999074935913},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3310999870300293},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.3287999927997589},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.323199987411499},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.31869998574256897},{"id":"https://openalex.org/C33724603","wikidata":"https://www.wikidata.org/wiki/Q812540","display_name":"Bayesian network","level":2,"score":0.3151000142097473},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3140000104904175},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.31220000982284546},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.30730000138282776},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.29980000853538513},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.29019999504089355},{"id":"https://openalex.org/C51332947","wikidata":"https://www.wikidata.org/wiki/Q1172305","display_name":"Shared resource","level":2,"score":0.2784999907016754},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.2754000127315521},{"id":"https://openalex.org/C2777826224","wikidata":"https://www.wikidata.org/wiki/Q5250404","display_name":"Deep integration","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.2615000009536743},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C9770341","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Geospatial analysis","level":2,"score":0.2563000023365021},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.2535000145435333},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3712285.3759857","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712285.3759857","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2097117768","https://openalex.org/W2112796928","https://openalex.org/W2193145675","https://openalex.org/W2194775991","https://openalex.org/W2513730816","https://openalex.org/W2944379335","https://openalex.org/W2963367478","https://openalex.org/W2963446712","https://openalex.org/W2963918968","https://openalex.org/W2982157693","https://openalex.org/W3017091196","https://openalex.org/W3097411828","https://openalex.org/W3214476430","https://openalex.org/W4231332361","https://openalex.org/W4289828024","https://openalex.org/W4290991121","https://openalex.org/W4304192541","https://openalex.org/W4401408761","https://openalex.org/W4402805367","https://openalex.org/W4405418476","https://openalex.org/W4405756372"],"related_works":[],"abstract_inverted_index":{"Many":[0],"inference":[1,140],"systems":[2],"leverage":[3],"spatial":[4,136],"multiplexing":[5,137],"technologies,":[6],"such":[7],"as":[8],"Multi-Process":[9],"Service":[10],"(MPS)":[11],"and":[12,34,58,71,86,93,109],"Multi-Instance":[13],"GPU":[14,60],"(MIG),":[15],"to":[16,55,102,119,144],"serve":[17],"deep":[18],"learning":[19],"models":[20,108],"concurrently":[21],"on":[22,127],"a":[23,47,73,112,128],"single":[24],"GPU.":[25],"However,":[26],"existing":[27],"solutions":[28],"suffer":[29],"from":[30],"interference":[31,57,99],"under":[32],"MPS":[33,51,68,87,98,123],"rigid":[35],"partition":[36],"sizes":[37],"in":[38,66],"MIG.":[39],"To":[40],"address":[41],"these":[42],"limitations,":[43],"we":[44],"propose":[45],"BOER,":[46],"system":[48],"that":[49,77,132],"combines":[50],"atop":[52],"MIG":[53,70,84,94],"partitions":[54],"reduce":[56],"enhance":[59],"utilization.":[61],"BOER":[62,105,133],"identifies":[63],"key":[64],"challenges":[65],"integrating":[67],"with":[69,115],"introduces":[72],"hierarchical":[74],"scheduling":[75],"framework":[76],"jointly":[78],"determines":[79],"model":[80],"colocation,":[81],"workload":[82],"distribution,":[83],"partitioning,":[85],"configurations,":[88],"while":[89,146],"minimizing":[90],"resource":[91],"fragmentation":[92],"reconfiguration":[95],"overhead.":[96],"Since":[97],"is":[100],"difficult":[101],"predict":[103],"accurately,":[104],"avoids":[106],"performance":[107],"instead":[110],"employs":[111],"Bayesian":[113],"optimization":[114],"tailored":[116],"acceleration":[117],"strategies":[118],"efficiently":[120],"explore":[121],"the":[122],"configuration":[124],"space.":[125],"Evaluation":[126],"real":[129],"testbed":[130],"demonstrates":[131],"outperforms":[134],"state-of-the-art":[135],"solutions,":[138],"improving":[139],"throughput":[141],"by":[142],"up":[143],"46.04%\u201377.19%":[145],"preserving":[147],"Quality-of-Service":[148],"requirements.":[149]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-12T00:00:00"}
