{"id":"https://openalex.org/W2489529491","doi":"https://doi.org/10.1109/micro.2016.7783721","title":"vDNN: Virtualized deep neural networks for scalable, memory-efficient neural network design","display_name":"vDNN: Virtualized deep neural networks for scalable, memory-efficient neural network design","publication_year":2016,"publication_date":"2016-10-01","ids":{"openalex":"https://openalex.org/W2489529491","doi":"https://doi.org/10.1109/micro.2016.7783721","mag":"2489529491"},"language":"en","primary_location":{"id":"doi:10.1109/micro.2016.7783721","is_oa":false,"landing_page_url":"https://doi.org/10.1109/micro.2016.7783721","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 49th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091648103","display_name":"Minsoo Rhu","orcid":"https://orcid.org/0000-0003-3303-8681"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Minsoo Rhu","raw_affiliation_strings":["NVIDIA, Santa Clara, CA","NVIDIA, Santa Clara, CA, 95050"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, 95050","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078016863","display_name":"Natalia Gimelshein","orcid":"https://orcid.org/0009-0002-9867-5075"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Natalia Gimelshein","raw_affiliation_strings":["NVIDIA, Santa Clara, CA","NVIDIA, Santa Clara, CA, 95050"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, 95050","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056153489","display_name":"Jason Clemons","orcid":"https://orcid.org/0000-0001-5533-417X"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Clemons","raw_affiliation_strings":["NVIDIA, Santa Clara, CA","NVIDIA, Santa Clara, CA, 95050"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, 95050","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065112065","display_name":"Arslan Zulfiqar","orcid":"https://orcid.org/0009-0003-6240-5900"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arslan Zulfiqar","raw_affiliation_strings":["NVIDIA, Santa Clara, CA","NVIDIA, Santa Clara, CA, 95050"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, 95050","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063354509","display_name":"Stephen W. Keckler","orcid":"https://orcid.org/0000-0001-6701-6099"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stephen W. Keckler","raw_affiliation_strings":["NVIDIA, Santa Clara, CA","NVIDIA, Santa Clara, CA, 95050"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA","institution_ids":["https://openalex.org/I4210127875"]},{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, 95050","institution_ids":["https://openalex.org/I4210127875"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5091648103"],"corresponding_institution_ids":["https://openalex.org/I4210127875"],"apc_list":null,"apc_paid":null,"fwci":12.3355,"has_fulltext":false,"cited_by_count":328,"citation_normalized_percentile":{"value":0.99064543,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8698354959487915},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5871006846427917},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5828657746315002},{"id":"https://openalex.org/keywords/uniform-memory-access","display_name":"Uniform memory access","score":0.5666656494140625},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.5175678133964539},{"id":"https://openalex.org/keywords/registered-memory","display_name":"Registered memory","score":0.5110288858413696},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.5088508129119873},{"id":"https://openalex.org/keywords/interleaved-memory","display_name":"Interleaved memory","score":0.5069330334663391},{"id":"https://openalex.org/keywords/memory-map","display_name":"Memory map","score":0.49828076362609863},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.49674659967422485},{"id":"https://openalex.org/keywords/dram","display_name":"Dram","score":0.4504547715187073},{"id":"https://openalex.org/keywords/flat-memory-model","display_name":"Flat memory model","score":0.422590047121048},{"id":"https://openalex.org/keywords/in-memory-processing","display_name":"In-Memory Processing","score":0.4185270071029663},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3685275614261627},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.3109095096588135},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25580018758773804},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.24500739574432373},{"id":"https://openalex.org/keywords/semiconductor-memory","display_name":"Semiconductor memory","score":0.2374356985092163},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.20678013563156128},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.0832759439945221}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8698354959487915},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5871006846427917},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5828657746315002},{"id":"https://openalex.org/C51290061","wikidata":"https://www.wikidata.org/wiki/Q1936765","display_name":"Uniform memory access","level":4,"score":0.5666656494140625},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.5175678133964539},{"id":"https://openalex.org/C93446704","wikidata":"https://www.wikidata.org/wiki/Q449328","display_name":"Registered memory","level":3,"score":0.5110288858413696},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5088508129119873},{"id":"https://openalex.org/C63511323","wikidata":"https://www.wikidata.org/wiki/Q908936","display_name":"Interleaved memory","level":4,"score":0.5069330334663391},{"id":"https://openalex.org/C74426580","wikidata":"https://www.wikidata.org/wiki/Q719484","display_name":"Memory map","level":3,"score":0.49828076362609863},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.49674659967422485},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.4504547715187073},{"id":"https://openalex.org/C57863822","wikidata":"https://www.wikidata.org/wiki/Q905488","display_name":"Flat memory model","level":4,"score":0.422590047121048},{"id":"https://openalex.org/C123593499","wikidata":"https://www.wikidata.org/wiki/Q6008583","display_name":"In-Memory Processing","level":5,"score":0.4185270071029663},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3685275614261627},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.3109095096588135},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25580018758773804},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.24500739574432373},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.2374356985092163},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.20678013563156128},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.0832759439945221},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.0},{"id":"https://openalex.org/C194222762","wikidata":"https://www.wikidata.org/wiki/Q114486","display_name":"Query by Example","level":4,"score":0.0},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/micro.2016.7783721","is_oa":false,"landing_page_url":"https://doi.org/10.1109/micro.2016.7783721","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 49th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)","raw_type":"proceedings-article"},{"id":"pmh:oai:oasis.postech.ac.kr:2014.oak/43284","is_oa":false,"landing_page_url":"https://oasis.postech.ac.kr/handle/2014.oak/43284","pdf_url":null,"source":{"id":"https://openalex.org/S4306401965","display_name":"Open Access System for Information Sharing (Pohang University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I123900574","host_organization_name":"Pohang University of Science and Technology","host_organization_lineage":["https://openalex.org/I123900574"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.4300000071525574,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W587794757","https://openalex.org/W1598866093","https://openalex.org/W1686810756","https://openalex.org/W1724438581","https://openalex.org/W2048266589","https://openalex.org/W2062430565","https://openalex.org/W2067523571","https://openalex.org/W2079735306","https://openalex.org/W2097117768","https://openalex.org/W2100926301","https://openalex.org/W2112796928","https://openalex.org/W2114766824","https://openalex.org/W2119144962","https://openalex.org/W2120972216","https://openalex.org/W2125389748","https://openalex.org/W2152839228","https://openalex.org/W2163605009","https://openalex.org/W2178615544","https://openalex.org/W2186615578","https://openalex.org/W2194775991","https://openalex.org/W2246760854","https://openalex.org/W2285660444","https://openalex.org/W2289252105","https://openalex.org/W2289543008","https://openalex.org/W2330672121","https://openalex.org/W2331143823","https://openalex.org/W2442974303","https://openalex.org/W2474388053","https://openalex.org/W2511743527","https://openalex.org/W2513554817","https://openalex.org/W2516141709","https://openalex.org/W2518281301","https://openalex.org/W2950179405","https://openalex.org/W2952230511","https://openalex.org/W2962835968","https://openalex.org/W2963542991","https://openalex.org/W2963674932","https://openalex.org/W2964299589","https://openalex.org/W3024621361","https://openalex.org/W4254672563","https://openalex.org/W4302296459","https://openalex.org/W6617368339","https://openalex.org/W6629368666","https://openalex.org/W6635810480","https://openalex.org/W6637151318","https://openalex.org/W6637373629","https://openalex.org/W6637709462","https://openalex.org/W6638632666","https://openalex.org/W6674914833","https://openalex.org/W6677103964","https://openalex.org/W6677580257","https://openalex.org/W6678000929","https://openalex.org/W6678583879","https://openalex.org/W6683738474","https://openalex.org/W6684191040","https://openalex.org/W6685730785","https://openalex.org/W6686509673","https://openalex.org/W6687483927","https://openalex.org/W6691194387","https://openalex.org/W6701947533","https://openalex.org/W6721281333"],"related_works":["https://openalex.org/W2296275612","https://openalex.org/W4248614727","https://openalex.org/W2354036839","https://openalex.org/W2612506697","https://openalex.org/W4232365528","https://openalex.org/W3048967625","https://openalex.org/W2564569739","https://openalex.org/W4321458411","https://openalex.org/W2041174925","https://openalex.org/W3025845664"],"abstract_inverted_index":{"The":[0],"most":[1],"widely":[2],"used":[3],"machine":[4,39],"learning":[5,40],"frameworks":[6],"require":[7],"users":[8],"to":[9,36,44,101,130,150,171,179],"carefully":[10],"tune":[11],"their":[12],"memory":[13,63,68,78,95,114,127,178],"usage":[14,69,96],"so":[15],"that":[16,65,73],"the":[17,24,54,67,92,124,133,181],"deep":[18],"neural":[19],"network":[20,50],"(DNN)":[21],"fits":[22],"into":[23],"DRAM":[25],"capacity":[26],"of":[27,70,97,116,123,135,148,164],"a":[28,33,47,61,110,154,172],"GPU.":[29],"This":[30],"restriction":[31],"hampers":[32],"researcher's":[34],"flexibility":[35],"study":[37],"different":[38],"algorithms,":[41],"forcing":[42],"them":[43],"either":[45],"use":[46],"less":[48],"desirable":[49],"architecture":[51],"or":[52],"parallelize":[53],"processing":[55],"across":[56],"multiple":[57],"GPUs.":[58],"We":[59],"propose":[60],"runtime":[62],"manager":[64],"virtualizes":[66],"DNNs":[71,129],"such":[72],"both":[74],"GPU":[75,94,159,175],"and":[76,106,126],"CPU":[77],"can":[79],"simultaneously":[80],"be":[81,151],"utilized":[82],"for":[83],"training":[84],"larger":[85],"DNNs.":[86,117],"Our":[87],"virtualized":[88],"DNN":[89],"(vDNN)":[90],"reduces":[91],"average":[93],"AlexNet":[98],"by":[99,104,108],"up":[100],"89%,":[102],"OverFeat":[103],"91%,":[105],"GoogLeNet":[107],"95%,":[109],"significant":[111],"reduction":[112],"in":[113],"requirements":[115],"Similar":[118],"experiments":[119],"on":[120,153],"VGG-16,":[121],"one":[122],"deepest":[125],"hungry":[128],"date,":[131],"demonstrate":[132],"memory-efficiency":[134],"our":[136],"proposal.":[137],"vDNN":[138],"enables":[139],"VGG-16":[140],"with":[141,166,176],"batch":[142],"size":[143],"256":[144],"(requiring":[145],"28":[146],"GB":[147,163],"memory)":[149],"trained":[152],"single":[155],"NVIDIA":[156],"Titan":[157],"X":[158],"card":[160],"containing":[161],"12":[162],"memory,":[165],"18%":[167],"performance":[168],"loss":[169],"compared":[170],"hypothetical,":[173],"oracular":[174],"enough":[177],"hold":[180],"entire":[182],"DNN.":[183]},"counts_by_year":[{"year":2026,"cited_by_count":10},{"year":2025,"cited_by_count":30},{"year":2024,"cited_by_count":44},{"year":2023,"cited_by_count":44},{"year":2022,"cited_by_count":42},{"year":2021,"cited_by_count":49},{"year":2020,"cited_by_count":36},{"year":2019,"cited_by_count":25},{"year":2018,"cited_by_count":37},{"year":2017,"cited_by_count":10},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2025-10-10T00:00:00"}
