{"id":"https://openalex.org/W3217445637","doi":"https://doi.org/10.1109/iccad51958.2021.9643501","title":"Automated Runtime-Aware Scheduling for Multi-Tenant DNN Inference on GPU","display_name":"Automated Runtime-Aware Scheduling for Multi-Tenant DNN Inference on GPU","publication_year":2021,"publication_date":"2021-11-01","ids":{"openalex":"https://openalex.org/W3217445637","doi":"https://doi.org/10.1109/iccad51958.2021.9643501","mag":"3217445637"},"language":"en","primary_location":{"id":"doi:10.1109/iccad51958.2021.9643501","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccad51958.2021.9643501","pdf_url":null,"source":{"id":"https://openalex.org/S4363608354","display_name":"2021 IEEE/ACM International Conference On Computer Aided Design (ICCAD)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/ACM International Conference On Computer Aided Design (ICCAD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://hdl.handle.net/11603/23954","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103085687","display_name":"Fuxun Yu","orcid":"https://orcid.org/0000-0002-4880-6658"},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fuxun Yu","raw_affiliation_strings":["George Mason University, Fairfax, VA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"George Mason University, Fairfax, VA, USA","institution_ids":["https://openalex.org/I162714631"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024715377","display_name":"Shawn Bray","orcid":null},"institutions":[{"id":"https://openalex.org/I126744593","display_name":"University of Maryland, Baltimore","ror":"https://ror.org/04rq5mt64","country_code":"US","type":"education","lineage":["https://openalex.org/I126744593"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shawn Bray","raw_affiliation_strings":["University of Maryland, Baltimore, MD, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Maryland, Baltimore, MD, USA","institution_ids":["https://openalex.org/I126744593"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100401427","display_name":"Di Wang","orcid":"https://orcid.org/0000-0002-3911-8159"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Di Wang","raw_affiliation_strings":["Microsoft,Redmond,WA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft,Redmond,WA,USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070325443","display_name":"Longfei Shangguan","orcid":"https://orcid.org/0000-0002-1153-7087"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Longfei Shangguan","raw_affiliation_strings":["Microsoft,Redmond,WA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft,Redmond,WA,USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087859795","display_name":"Xulong Tang","orcid":"https://orcid.org/0000-0002-3385-2053"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xulong Tang","raw_affiliation_strings":["University of Pittsburgh, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Pittsburgh, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100767202","display_name":"Chenchen Liu","orcid":"https://orcid.org/0000-0001-7749-0640"},"institutions":[{"id":"https://openalex.org/I126744593","display_name":"University of Maryland, Baltimore","ror":"https://ror.org/04rq5mt64","country_code":"US","type":"education","lineage":["https://openalex.org/I126744593"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chenchen Liu","raw_affiliation_strings":["University of Maryland, Baltimore, MD, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Maryland, Baltimore, MD, USA","institution_ids":["https://openalex.org/I126744593"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100441957","display_name":"Xiang Chen","orcid":"https://orcid.org/0000-0003-2790-976X"},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiang Chen","raw_affiliation_strings":["George Mason University, Fairfax, VA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"George Mason University, Fairfax, VA, USA","institution_ids":["https://openalex.org/I162714631"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.1005,"has_fulltext":false,"cited_by_count":44,"citation_normalized_percentile":{"value":0.91805158,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8563896417617798},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7406025528907776},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6865862011909485},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4995722770690918},{"id":"https://openalex.org/keywords/concurrency","display_name":"Concurrency","score":0.49456116557121277},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37573400139808655},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3252147436141968}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8563896417617798},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7406025528907776},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6865862011909485},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4995722770690918},{"id":"https://openalex.org/C193702766","wikidata":"https://www.wikidata.org/wiki/Q1414548","display_name":"Concurrency","level":2,"score":0.49456116557121277},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37573400139808655},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3252147436141968},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccad51958.2021.9643501","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccad51958.2021.9643501","pdf_url":null,"source":{"id":"https://openalex.org/S4363608354","display_name":"2021 IEEE/ACM International Conference On Computer Aided Design (ICCAD)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/ACM International Conference On Computer Aided Design (ICCAD)","raw_type":"proceedings-article"},{"id":"pmh:oai:mdsoar.org:11603/23954","is_oa":true,"landing_page_url":"http://hdl.handle.net/11603/23954","pdf_url":null,"source":{"id":"https://openalex.org/S4306402556","display_name":"Maryland Shared Open Access Repository (USMAI Consortium)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},{"id":"doi:10.13016/m2c1cv-iids","is_oa":true,"landing_page_url":"https://doi.org/10.13016/m2c1cv-iids","pdf_url":null,"source":{"id":"https://openalex.org/S4306402644","display_name":"Digital Repository at the University of Maryland (University of Maryland College Park)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I66946132","host_organization_name":"University of Maryland, College Park","host_organization_lineage":["https://openalex.org/I66946132"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:mdsoar.org:11603/23954","is_oa":true,"landing_page_url":"http://hdl.handle.net/11603/23954","pdf_url":null,"source":{"id":"https://openalex.org/S4306402556","display_name":"Maryland Shared Open Access Repository (USMAI Consortium)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8675370699","display_name":null,"funder_award_id":"CNS-2003211,CNS-1939380","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1861492603","https://openalex.org/W2108598243","https://openalex.org/W2143612262","https://openalex.org/W2146575011","https://openalex.org/W2794258351","https://openalex.org/W2804032941","https://openalex.org/W2886934227","https://openalex.org/W2921495890","https://openalex.org/W2945357343","https://openalex.org/W2950045474","https://openalex.org/W2962861284","https://openalex.org/W2967733054","https://openalex.org/W2981758446","https://openalex.org/W3035564946","https://openalex.org/W3097511903","https://openalex.org/W3101965820","https://openalex.org/W3120853372","https://openalex.org/W3129083170","https://openalex.org/W3136585513","https://openalex.org/W6639102338","https://openalex.org/W6751349269","https://openalex.org/W6754123467"],"related_works":["https://openalex.org/W1888970550","https://openalex.org/W3085024073","https://openalex.org/W2055243143","https://openalex.org/W2477601761","https://openalex.org/W59382029","https://openalex.org/W2997101070","https://openalex.org/W2135424310","https://openalex.org/W2473726211","https://openalex.org/W1721911782","https://openalex.org/W2172295952"],"abstract_inverted_index":{"With":[0],"the":[1,38,60,98,104,131,138],"fast":[2],"development":[3],"of":[4],"deep":[5],"neural":[6],"networks":[7],"(DNNs),":[8],"many":[9],"real-world":[10],"applications":[11],"are":[12],"adopting":[13],"multiple":[14],"models":[15,27],"to":[16,114,154],"conduct":[17],"compound":[18],"tasks,":[19],"such":[20,65],"as":[21,53,55],"co-running":[22],"classification,":[23],"detection,":[24],"and":[25,41,103,119,135,162],"segmentation":[26],"on":[28,86],"autonomous":[29],"vehicles.":[30],"Such":[31],"multi-tenant":[32,66,83],"DNN":[33,84,91,121,156],"inference":[34,67,85,133],"cases":[35],"greatly":[36],"exacerbate":[37],"computational":[39],"complexity":[40],"call":[42],"for":[43,46,64,81],"comprehensive":[44],"collaboration":[45],"graph-level":[47],"operator":[48],"scheduling,":[49],"runtime-level":[50],"resource":[51,128],"awareness,":[52],"well":[54],"hardware":[56],"scheduler":[57],"support.":[58],"However,":[59],"current":[61],"scheduling":[62,79,100,165],"support":[63],"is":[68],"still":[69],"relatively":[70],"backward.":[71],"In":[72],"this":[73],"work,":[74],"we":[75,144],"propose":[76],"a":[77,125],"resource-aware":[78],"framework":[80],"efficient":[82],"GPU,":[87],"which":[88],"automatically":[89],"coordinates":[90],"computing":[92],"in":[93],"different":[94],"execution":[95],"levels.":[96],"Leveraging":[97],"unified":[99],"intermediate":[101],"representation":[102],"automated":[105],"ML-based":[106],"searching":[107],"algorithm,":[108],"optimal":[109],"schedules":[110],"could":[111,145],"be":[112],"generated":[113],"wisely":[115],"adjust":[116],"model":[117,122],"concurrency":[118],"interleave":[120],"operators,":[123],"maintaining":[124],"continuously":[126],"balanced":[127],"utilization":[129],"across":[130],"entire":[132],"process,":[134],"eventually":[136],"improving":[137],"runtime":[139,157],"efficiency.":[140],"Experiments":[141],"show":[142],"that":[143],"consistently":[146],"achieve":[147],"<tex":[148],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[149],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1.3\\times\\sim":[150],"1.7\\times$</tex>":[151],"speed-up,":[152],"comparing":[153],"regular":[155],"libraries":[158],"(e.g.,":[159,167],"CuDNN,":[160],"TVM)":[161],"particular":[163],"concurrent":[164],"methods":[166],"NVIDIA":[168],"Multi-Stream).":[169]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":18},{"year":2023,"cited_by_count":13},{"year":2021,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
