{"id":"https://openalex.org/W4390677227","doi":"https://doi.org/10.1145/3631311.3632401","title":"Towards Optimal Preemptive GPU Time-Sharing for Edge Model Serving","display_name":"Towards Optimal Preemptive GPU Time-Sharing for Edge Model Serving","publication_year":2023,"publication_date":"2023-12-11","ids":{"openalex":"https://openalex.org/W4390677227","doi":"https://doi.org/10.1145/3631311.3632401"},"language":"en","primary_location":{"id":"doi:10.1145/3631311.3632401","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3631311.3632401","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3631311.3632401","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Workshop on Container Technologies and Container Clouds","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3631311.3632401","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060576594","display_name":"Zhengxu Xia","orcid":"https://orcid.org/0000-0001-8626-192X"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhengxu Xia","raw_affiliation_strings":["University of Chicago, USA"],"affiliations":[{"raw_affiliation_string":"University of Chicago, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101967178","display_name":"Yitian Hao","orcid":"https://orcid.org/0009-0002-4330-1228"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yitian Hao","raw_affiliation_strings":["University of Chicago, USA"],"affiliations":[{"raw_affiliation_string":"University of Chicago, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055829585","display_name":"Jun Duan","orcid":"https://orcid.org/0000-0002-5043-2421"},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jun Duan","raw_affiliation_strings":["IBM Thomas J. Watson Research Center, USA"],"affiliations":[{"raw_affiliation_string":"IBM Thomas J. Watson Research Center, USA","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100337643","display_name":"Chen Wang","orcid":"https://orcid.org/0000-0003-0204-2362"},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chen Wang","raw_affiliation_strings":["IBM Thomas J. Watson Research Center, USA"],"affiliations":[{"raw_affiliation_string":"IBM Thomas J. Watson Research Center, USA","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103258769","display_name":"Junchen Jiang","orcid":"https://orcid.org/0000-0002-6877-1683"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Junchen Jiang","raw_affiliation_strings":["University of Chicago, USA"],"affiliations":[{"raw_affiliation_string":"University of Chicago, USA","institution_ids":["https://openalex.org/I40347166"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5060576594"],"corresponding_institution_ids":["https://openalex.org/I40347166"],"apc_list":null,"apc_paid":null,"fwci":1.5125,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.82844867,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"13","last_page":"18"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preemption","display_name":"Preemption","score":0.8872497081756592},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8692692518234253},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.6818643808364868},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.6265788078308105},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6082836985588074},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5011582374572754},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.4871349036693573},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.44027701020240784},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.43136849999427795},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3850160241127014},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.29335880279541016},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18639037013053894},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.08204445242881775}],"concepts":[{"id":"https://openalex.org/C206952183","wikidata":"https://www.wikidata.org/wiki/Q1193100","display_name":"Preemption","level":2,"score":0.8872497081756592},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8692692518234253},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.6818643808364868},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.6265788078308105},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6082836985588074},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5011582374572754},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.4871349036693573},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.44027701020240784},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.43136849999427795},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3850160241127014},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.29335880279541016},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18639037013053894},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.08204445242881775},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3631311.3632401","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3631311.3632401","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3631311.3632401","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Workshop on Container Technologies and Container Clouds","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3631311.3632401","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3631311.3632401","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3631311.3632401","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Workshop on Container Technologies and Container Clouds","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.41999998688697815}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390677227.pdf","grobid_xml":"https://content.openalex.org/works/W4390677227.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W95608104","https://openalex.org/W2194775991","https://openalex.org/W2500476212","https://openalex.org/W2565600385","https://openalex.org/W2581065617","https://openalex.org/W2734941459","https://openalex.org/W2963351448","https://openalex.org/W3033527233","https://openalex.org/W3037377931","https://openalex.org/W3096484587","https://openalex.org/W3097411828","https://openalex.org/W3127819678","https://openalex.org/W3208777667","https://openalex.org/W4312199008","https://openalex.org/W4317935380","https://openalex.org/W6760979735","https://openalex.org/W6798686915"],"related_works":["https://openalex.org/W3191866865","https://openalex.org/W840708677","https://openalex.org/W2361541374","https://openalex.org/W2000785801","https://openalex.org/W1439300391","https://openalex.org/W1970491116","https://openalex.org/W2888133284","https://openalex.org/W986318368","https://openalex.org/W601696587","https://openalex.org/W2881508932"],"abstract_inverted_index":{"With":[0],"GPUs":[1],"increasingly":[2],"shared":[3,178],"by":[4,87,166,271],"DNN":[5,38,141,215],"models":[6],"at":[7],"the":[8,19,41,57,65,69,73,84,107,119,126,140,189,229,235,259],"edge,":[9],"a":[10,25,60,134,171,174,204,211,223,244],"crucial":[11],"tradeoff":[12],"arises":[13],"between":[14],"high":[15,45],"GPU":[16,42,46,66,151,212,261],"utilization":[17],"and":[18,98,115,128,150,192,238,241,268,273,293],"ability":[20],"of":[21,121,136,147,155,207,220,231,246,288],"fast":[22],"preemption":[23,62,85,99,129,239],"when":[24],"high-priority":[26,74],"request":[27,75],"arrives.":[28],"To":[29,197],"reduce":[30],"inference":[31,34,96,127,236,252,266],"delay,":[32],"an":[33],"job":[35],"can":[36,76,82],"\"burst\"":[37],"kernels":[39,53,71],"into":[40],"to":[43,93,181,209,258],"maintain":[44],"utilization,":[47],"but":[48,153],"this":[49],"also":[50],"creates":[51],"outstanding":[52],"internally":[54],"queued":[55,70],"in":[56,161,177],"GPU,":[58],"causing":[59],"substantial":[61],"delay":[63,86,97,100,225,237,240,253,267],"as":[64],"must":[67],"clear":[68],"before":[72],"preempt.":[77],"Unfortunately,":[78],"while":[79],"existing":[80],"systems":[81,180],"alleviate":[83],"adding":[88,289],"synchronization":[89,108,123,185,232,248,290],"points,":[90],"they":[91,103],"fail":[92],"keep":[94,279],"both":[95],"low,":[101],"because":[102],"cannot":[104],"optimally":[105],"insert":[106,183],"points":[109,124,186,233,249,291],"for":[110,173,213,297],"various":[111],"workloads.":[112],"Our":[113],"measurements":[114],"analysis":[116],"show":[117],"that":[118,227,250],"impact":[120,230],"inserting":[122,247],"on":[125,188,234],"delays":[130],"varies":[131],"greatly":[132],"with":[133],"range":[135],"workload":[137,157,190],"characteristics,":[138],"including":[139],"architecture,":[142],"input":[143],"size,":[144],"batch":[145],"size":[146],"different":[148],"requests,":[149],"type,":[152],"most":[154],"these":[156,167],"factors":[158],"are":[159],"overlooked":[160],"current":[162],"shared-edge":[163],"systems.":[164],"Inspired":[165],"findings,":[168],"we":[169,201,277],"make":[170],"case":[172],"new":[175],"module":[176],"edge":[179],"dynamically":[182,242,292],"kernel":[184],"depending":[187],"characteristics":[191],"service-level":[193],"objective":[194],"(SLO)":[195],"deadlines.":[196],"examine":[198],"its":[199],"potential,":[200],"present":[202],"Deft,":[203],"concrete":[205],"prototype":[206],"concept":[208],"share":[210],"multiple":[214],"containers.":[216],"The":[217],"key":[218,295],"component":[219],"Deft":[221,264],"is":[222],"profiling-based":[224],"predictor":[226],"estimates":[228],"selects":[243],"frequency":[245],"minimizes":[251],"or":[254],"SLO":[255,269],"violations.":[256],"Compared":[257],"state-of-the-art":[260],"sharing":[262],"schemes,":[263],"reduces":[265],"violations":[270],"28%":[272],"14%":[274],"respectively.":[275],"While":[276],"intentionally":[278],"Deft's":[280],"design":[281],"simple,":[282],"it":[283],"already":[284],"shows":[285],"early":[286],"promise":[287],"highlights":[294],"questions":[296],"future":[298],"research.":[299]},"counts_by_year":[{"year":2025,"cited_by_count":5}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
