{"id":"https://openalex.org/W2626991402","doi":"https://doi.org/10.1145/3079856.3080244","title":"ScaleDeep","display_name":"ScaleDeep","publication_year":2017,"publication_date":"2017-06-15","ids":{"openalex":"https://openalex.org/W2626991402","doi":"https://doi.org/10.1145/3079856.3080244","mag":"2626991402"},"language":"en","primary_location":{"id":"doi:10.1145/3079856.3080244","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3079856.3080244","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 44th Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010094713","display_name":"Swagath Venkataramani","orcid":"https://orcid.org/0000-0002-0470-6364"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Swagath Venkataramani","raw_affiliation_strings":["School of ECE, Purdue University"],"affiliations":[{"raw_affiliation_string":"School of ECE, Purdue University","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047643363","display_name":"Ashish Ranjan","orcid":"https://orcid.org/0000-0003-2434-0475"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ashish Ranjan","raw_affiliation_strings":["School of ECE, Purdue University"],"affiliations":[{"raw_affiliation_string":"School of ECE, Purdue University","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013922094","display_name":"Subarno Banerjee","orcid":"https://orcid.org/0000-0001-5449-2264"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Subarno Banerjee","raw_affiliation_strings":["Parallel Computing Lab, Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab, Intel Corporation","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052810907","display_name":"Dipankar Das","orcid":"https://orcid.org/0000-0002-8110-9344"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dipankar Das","raw_affiliation_strings":["Parallel Computing Lab, Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab, Intel Corporation","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110786116","display_name":"Sasikanth Avancha","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sasikanth Avancha","raw_affiliation_strings":["Parallel Computing Lab, Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab, Intel Corporation","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038226209","display_name":"Ashok Jagannathan","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ashok Jagannathan","raw_affiliation_strings":["Parallel Computing Lab, Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab, Intel Corporation","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026393182","display_name":"Ajaya Durg","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ajaya Durg","raw_affiliation_strings":["Parallel Computing Lab, Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab, Intel Corporation","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018287086","display_name":"Dheemanth Nagaraj","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dheemanth Nagaraj","raw_affiliation_strings":["Parallel Computing Lab, Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab, Intel Corporation","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083700279","display_name":"Bharat Kaul","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bharat Kaul","raw_affiliation_strings":["Parallel Computing Lab, Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab, Intel Corporation","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032238070","display_name":"Pradeep Dubey","orcid":"https://orcid.org/0000-0001-5853-0619"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pradeep Dubey","raw_affiliation_strings":["Parallel Computing Lab, Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Parallel Computing Lab, Intel Corporation","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065766721","display_name":"Anand Raghunathan","orcid":"https://orcid.org/0000-0002-4624-564X"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anand Raghunathan","raw_affiliation_strings":["School of ECE, Purdue University"],"affiliations":[{"raw_affiliation_string":"School of ECE, Purdue University","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5010094713"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":9.7393,"has_fulltext":false,"cited_by_count":199,"citation_normalized_percentile":{"value":0.98742163,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"13","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8360121250152588},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5346269607543945},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4861997961997986},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4340220093727112},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.41444656252861023},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3854065239429474},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.36646729707717896},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.33707737922668457},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15083420276641846}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8360121250152588},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5346269607543945},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4861997961997986},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4340220093727112},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.41444656252861023},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3854065239429474},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.36646729707717896},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.33707737922668457},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15083420276641846},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3079856.3080244","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3079856.3080244","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 44th Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W1487583988","https://openalex.org/W1598866093","https://openalex.org/W1775434803","https://openalex.org/W1884620995","https://openalex.org/W1902934009","https://openalex.org/W1922655562","https://openalex.org/W1980446076","https://openalex.org/W1998917233","https://openalex.org/W2009832130","https://openalex.org/W2043607059","https://openalex.org/W2044535169","https://openalex.org/W2048266589","https://openalex.org/W2057434193","https://openalex.org/W2060969833","https://openalex.org/W2097117768","https://openalex.org/W2108598243","https://openalex.org/W2117539524","https://openalex.org/W2125203716","https://openalex.org/W2139501017","https://openalex.org/W2152839228","https://openalex.org/W2160815625","https://openalex.org/W2168231600","https://openalex.org/W2170135819","https://openalex.org/W2184045248","https://openalex.org/W2285660444","https://openalex.org/W2287011250","https://openalex.org/W2289252105","https://openalex.org/W2293746900","https://openalex.org/W2311783643","https://openalex.org/W2407022425","https://openalex.org/W2489529491","https://openalex.org/W2513554817","https://openalex.org/W2516141709","https://openalex.org/W2518281301","https://openalex.org/W2560017826","https://openalex.org/W2604272474","https://openalex.org/W2606722458","https://openalex.org/W2618530766","https://openalex.org/W2949245006","https://openalex.org/W2949650786","https://openalex.org/W2950656546","https://openalex.org/W2950967261","https://openalex.org/W2962835968","https://openalex.org/W2962950660","https://openalex.org/W2963374099","https://openalex.org/W2964174152","https://openalex.org/W2964299589","https://openalex.org/W3004171485","https://openalex.org/W3024621361","https://openalex.org/W4243519499","https://openalex.org/W4245199738","https://openalex.org/W4251575795","https://openalex.org/W4251828973","https://openalex.org/W4299553509","https://openalex.org/W4302283059","https://openalex.org/W6600020652","https://openalex.org/W6600113797","https://openalex.org/W6600336938","https://openalex.org/W6628107832","https://openalex.org/W6630431485","https://openalex.org/W6819060087"],"related_works":["https://openalex.org/W2389214306","https://openalex.org/W2965083567","https://openalex.org/W4235240664","https://openalex.org/W1838576100","https://openalex.org/W2095886385","https://openalex.org/W2889616422","https://openalex.org/W2089704382","https://openalex.org/W1983399550","https://openalex.org/W97075385","https://openalex.org/W4401278057"],"abstract_inverted_index":{"Deep":[0],"Neural":[1],"Networks":[2],"(DNNs)":[3],"have":[4,96,209],"demonstrated":[5],"state-of-the-art":[6,283,308],"performance":[7,230,269,309],"on":[8,183,244,310],"a":[9,69,158,178,211,224,267],"broad":[10],"range":[11],"of":[12,40,46,51,64,89,153,240,254,270,296],"tasks":[13],"involving":[14],"natural":[15],"language,":[16],"speech,":[17],"image,":[18],"and":[19,22,43,77,86,134,145,161,172,187,201,223,231,237,265,275,288],"video":[20],"processing,":[21,75],"are":[23,53,80],"deployed":[24],"in":[25,57,99,141,155,175],"many":[26],"real":[27],"world":[28],"applications.":[29],"However,":[30],"DNNs":[31,192,284],"impose":[32],"significant":[33],"computational":[34,142],"challenges":[35],"owing":[36],"to":[37,55,82,114,136,168,190,193,213,218,228,246],"the":[38,41,44,58,62,84,102,138,169,194,297,307],"complexity":[39],"networks":[42],"amount":[45],"data":[47,199],"they":[48],"process,":[49],"both":[50],"which":[52,124],"projected":[54],"grow":[56],"future.":[59],"To":[60],"improve":[61,202],"efficiency":[63,128],"DNNs,":[65,156,176],"we":[66],"propose":[67],"ScaleDeep,":[68,222],"dense,":[70],"scalable":[71],"server":[72],"architecture,":[73],"whose":[74],"memory":[76,159,170],"interconnect":[78,163],"subsystems":[79],"specialized":[81],"leverage":[83],"compute":[85],"communication":[87,173],"characteristics":[88,143],"DNNs.":[90],"While":[91],"several":[92],"DNN":[93,110,216],"accelerator":[94],"designs":[95],"been":[97],"proposed":[98,195],"recent":[100],"years,":[101],"key":[103,120],"difference":[104],"is":[105,166],"that":[106,148,165,197,260],"ScaleDeep":[107,125,255,300],"primarily":[108],"targets":[109],"training,":[111],"as":[112],"opposed":[113],"only":[115],"inference":[116],"or":[117],"evaluation.":[118],"The":[119,233],"architectural":[121,226],"features":[122],"from":[123,293],"derives":[126],"its":[127],"are:":[129],"(i)":[130],"heterogeneous":[131],"processing":[132,258],"tiles":[133,259],"chips":[135],"match":[137],"wide":[139],"diversity":[140],"(FLOPs":[144],"Bytes/FLOP":[146],"ratio)":[147],"manifest":[149],"at":[150,262,279,304],"different":[151],"levels":[152],"granularity":[154],"(ii)":[157],"hierarchy":[160],"3-tiered":[162],"topology":[164,217],"suited":[167],"access":[171],"patterns":[174],"(iii)":[177],"low-overhead":[179],"synchronization":[180],"mechanism":[181],"based":[182,243],"hardware":[184],"data-flow":[185],"trackers,":[186],"(iv)":[188],"methods":[189],"map":[191],"architecture":[196],"minimize":[198],"movement":[200],"core":[203],"utilization":[204],"through":[205],"nested":[206],"pipelining.":[207],"We":[208,250],"developed":[210],"compiler":[212],"allow":[214],"any":[215],"be":[219],"programmed":[220],"onto":[221],"detailed":[225],"simulator":[227,234],"estimate":[229],"energy.":[232],"incorporates":[235],"timing":[236],"power":[238],"models":[239],"ScaleDeep's":[241],"components":[242],"synthesis":[245],"Intel's":[247],"14nm":[248],"technology.":[249],"evaluate":[251],"an":[252],"embodiment":[253],"with":[256],"7032":[257],"operates":[261],"600":[263],"MHz":[264],"has":[266],"peak":[268],"680":[271],"TFLOPs":[272],"(single":[273],"precision)":[274],"1.35":[276],"PFLOPs":[277],"(half-precision)":[278],"1.4KW.":[280],"Across":[281],"11":[282],"containing":[285],"0.65M-14.9M":[286],"neurons":[287],"6.8M-145.9M":[289],"weights,":[290],"including":[291],"winners":[292],"5":[294],"years":[295],"ImageNet":[298],"competition,":[299],"demonstrates":[301],"6x-28x":[302],"speedup":[303],"iso-power":[305],"over":[306],"GPUs.":[311]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":21},{"year":2021,"cited_by_count":45},{"year":2020,"cited_by_count":38},{"year":2019,"cited_by_count":35},{"year":2018,"cited_by_count":29},{"year":2017,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2017-06-23T00:00:00"}
