{"id":"https://openalex.org/W4403182945","doi":"https://doi.org/10.1109/tcc.2024.3476210","title":"D-STACK: High Throughput DNN Inference by Effective Multiplexing and Spatio-Temporal Scheduling of GPUs","display_name":"D-STACK: High Throughput DNN Inference by Effective Multiplexing and Spatio-Temporal Scheduling of GPUs","publication_year":2024,"publication_date":"2024-10-01","ids":{"openalex":"https://openalex.org/W4403182945","doi":"https://doi.org/10.1109/tcc.2024.3476210"},"language":"en","primary_location":{"id":"doi:10.1109/tcc.2024.3476210","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcc.2024.3476210","pdf_url":null,"source":{"id":"https://openalex.org/S2492498579","display_name":"IEEE Transactions on Cloud Computing","issn_l":"2168-7161","issn":["2168-7161","2372-0018"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cloud Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077061884","display_name":"Aditya Dhakal","orcid":"https://orcid.org/0000-0002-8297-8525"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aditya Dhakal","raw_affiliation_strings":["University of California, Riverside, Riverside, CA, USA","University of California, Riverside, USA"],"raw_orcid":"https://orcid.org/0000-0002-8297-8525","affiliations":[{"raw_affiliation_string":"University of California, Riverside, Riverside, CA, USA","institution_ids":["https://openalex.org/I103635307"]},{"raw_affiliation_string":"University of California, Riverside, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081037214","display_name":"Sameer G. Kulkarni","orcid":"https://orcid.org/0000-0003-4727-6875"},"institutions":[{"id":"https://openalex.org/I27674431","display_name":"Indian Institute of Technology Gandhinagar","ror":"https://ror.org/0036p5w23","country_code":"IN","type":"education","lineage":["https://openalex.org/I27674431"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sameer G. Kulkarni","raw_affiliation_strings":["IIT Gandhinagar, Gujarat, India","IIT Gandhinagar, Hekla, India"],"raw_orcid":"https://orcid.org/0000-0003-4727-6875","affiliations":[{"raw_affiliation_string":"IIT Gandhinagar, Gujarat, India","institution_ids":["https://openalex.org/I27674431"]},{"raw_affiliation_string":"IIT Gandhinagar, Hekla, India","institution_ids":["https://openalex.org/I27674431"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053712998","display_name":"K. K. Ramakrishnan","orcid":"https://orcid.org/0000-0003-1849-5155"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"K. K. Ramakrishnan","raw_affiliation_strings":["University of California, Riverside, Riverside, CA, USA","University of California, Riverside, USA"],"raw_orcid":"https://orcid.org/0000-0003-1849-5155","affiliations":[{"raw_affiliation_string":"University of California, Riverside, Riverside, CA, USA","institution_ids":["https://openalex.org/I103635307"]},{"raw_affiliation_string":"University of California, Riverside, USA","institution_ids":["https://openalex.org/I103635307"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4299,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.64453533,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"12","issue":"4","first_page":"1344","last_page":"1358"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9610999822616577,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10862","display_name":"AI in cancer detection","score":0.9136999845504761,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8352196216583252},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7016545534133911},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6591694951057434},{"id":"https://openalex.org/keywords/stack","display_name":"Stack (abstract data type)","score":0.6305789351463318},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6011381149291992},{"id":"https://openalex.org/keywords/multiplexing","display_name":"Multiplexing","score":0.5561733841896057},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.47347939014434814},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.46345704793930054},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.36826658248901367},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19134923815727234},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.12704682350158691},{"id":"https://openalex.org/keywords/wireless","display_name":"Wireless","score":0.09420701861381531}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8352196216583252},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7016545534133911},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6591694951057434},{"id":"https://openalex.org/C9395851","wikidata":"https://www.wikidata.org/wiki/Q177929","display_name":"Stack (abstract data type)","level":2,"score":0.6305789351463318},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6011381149291992},{"id":"https://openalex.org/C19275194","wikidata":"https://www.wikidata.org/wiki/Q222903","display_name":"Multiplexing","level":2,"score":0.5561733841896057},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.47347939014434814},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.46345704793930054},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.36826658248901367},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19134923815727234},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.12704682350158691},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.09420701861381531},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcc.2024.3476210","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcc.2024.3476210","pdf_url":null,"source":{"id":"https://openalex.org/S2492498579","display_name":"IEEE Transactions on Cloud Computing","issn_l":"2168-7161","issn":["2168-7161","2372-0018"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cloud Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.4699999988079071,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W1979566015","https://openalex.org/W1979866793","https://openalex.org/W2108598243","https://openalex.org/W2147800946","https://openalex.org/W2150871235","https://openalex.org/W2163687928","https://openalex.org/W2194775991","https://openalex.org/W2409247164","https://openalex.org/W2465005408","https://openalex.org/W2480662436","https://openalex.org/W2531915888","https://openalex.org/W2549139847","https://openalex.org/W2604514113","https://openalex.org/W2606722458","https://openalex.org/W2613168994","https://openalex.org/W2791673912","https://openalex.org/W2798291715","https://openalex.org/W2808758461","https://openalex.org/W2823766194","https://openalex.org/W2885579974","https://openalex.org/W2891285910","https://openalex.org/W2896457183","https://openalex.org/W2934426651","https://openalex.org/W2952562115","https://openalex.org/W2969388332","https://openalex.org/W2982157693","https://openalex.org/W2991040477","https://openalex.org/W3037377931","https://openalex.org/W3080985349","https://openalex.org/W3097411828","https://openalex.org/W3179726888","https://openalex.org/W3196353812","https://openalex.org/W4231332361","https://openalex.org/W4245867598","https://openalex.org/W4246193833","https://openalex.org/W4286511746","https://openalex.org/W4317935380","https://openalex.org/W6607252725","https://openalex.org/W6637373629","https://openalex.org/W6676984168","https://openalex.org/W6684191040","https://openalex.org/W6727690538","https://openalex.org/W6730956707","https://openalex.org/W6737664043","https://openalex.org/W6750448596","https://openalex.org/W6750569764","https://openalex.org/W6751349269","https://openalex.org/W6753846162","https://openalex.org/W6756009870","https://openalex.org/W6757364461","https://openalex.org/W6758283263","https://openalex.org/W6758946347","https://openalex.org/W6759814162","https://openalex.org/W6766978945","https://openalex.org/W6776877028","https://openalex.org/W6778371292","https://openalex.org/W6779103662","https://openalex.org/W6779965347","https://openalex.org/W6780284225","https://openalex.org/W6787098909"],"related_works":["https://openalex.org/W2380576232","https://openalex.org/W2937054111","https://openalex.org/W2066223521","https://openalex.org/W2013178899","https://openalex.org/W373327546","https://openalex.org/W2321534397","https://openalex.org/W2058958858","https://openalex.org/W2077601556","https://openalex.org/W2001568286","https://openalex.org/W2001823884"],"abstract_inverted_index":{"Hardware":[0],"accelerators":[1,30],"such":[2],"as":[3],"GPUs":[4],"are":[5,89],"required":[6],"for":[7,66,102,136],"real-time,":[8],"low":[9],"latency":[10],"inference":[11,18,61,141,194],"with":[12,153,172],"Deep":[13],"Neural":[14],"Networks":[15],"(DNN).":[16],"Providing":[17],"services":[19],"in":[20,31,107,184,193],"the":[21,32,39,43,51,64,69,108,119],"cloud":[22,33],"can":[23,123],"be":[24],"resource":[25],"intensive,":[26],"and":[27,59,77,82,97,113,125,157,187],"effectively":[28],"utilizing":[29],"is":[34],"important.":[35],"Spatial":[36],"multiplexing":[37,156,173],"of":[38,72],"GPU,":[40],"while":[41,146],"limiting":[42],"GPU":[44,57,65,109,155,185],"resources":[45],"(GPU%)":[46],"to":[47,50,55,74,105,130,180,189],"each":[48,67,121,137],"DNN":[49,68,122,167,176],"right":[52],"amount,":[53],"leads":[54],"higher":[56,60],"utilization":[58,186],"throughput.":[62,195],"Right-sizing":[63],"optimal":[70],"batching":[71],"requests":[73],"balance":[75],"throughput":[76,84,145],"service":[78],"level":[79],"objectives":[80],"(SLOs),":[81],"maximizing":[83],"by":[85],"appropriately":[86],"scheduling":[87,158],"DNNs":[88,104],"still":[90],"significant":[91],"challenges.This":[92],"article":[93],"introduces":[94],"a":[95,115,126],"dynamic":[96],"fair":[98],"spatio-temporal":[99],"scheduler":[100],"(D-STACK)":[101],"multiple":[103],"run":[106],"concurrently.":[110],"We":[111,150],"develop":[112],"validate":[114],"model":[116],"that":[117],"estimates":[118],"parallelism":[120],"utilize":[124],"lightweight":[127],"optimization":[128],"formulation":[129],"find":[131],"an":[132],"efficient":[133],"batch":[134],"size":[135],"DNN.":[138],"Our":[139,169],"holistic":[140],"framework":[142],"provides":[143],"high":[144],"meeting":[147],"application":[148],"SLOs.":[149],"compare":[151],"D-STACK":[152],"other":[154],"methods":[159],"(e.g.,":[160],"NVIDIA":[161],"Triton,":[162],"Clipper,":[163],"Nexus),":[164],"using":[165],"popular":[166,175],"models.":[168],"controlled":[170],"experiments":[171],"several":[174],"models":[177],"achieve":[178],"up":[179,188],"<inline-formula><tex-math":[181,190],"notation=\"LaTeX\">$1.6\\times$</tex-math></inline-formula>":[182],"improvement":[183,192],"notation=\"LaTeX\">$4\\times$</tex-math></inline-formula>":[191]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
