{"id":"https://openalex.org/W2582737986","doi":"https://doi.org/10.1109/tpds.2017.2752706","title":"Neurostream: Scalable and Energy Efficient Deep Learning with Smart Memory Cubes","display_name":"Neurostream: Scalable and Energy Efficient Deep Learning with Smart Memory Cubes","publication_year":2017,"publication_date":"2017-09-15","ids":{"openalex":"https://openalex.org/W2582737986","doi":"https://doi.org/10.1109/tpds.2017.2752706","mag":"2582737986"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2017.2752706","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2017.2752706","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1701.06420","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Erfan Azarkhish","orcid":"https://orcid.org/0000-0003-4934-0332"},"institutions":[{"id":"https://openalex.org/I9360294","display_name":"University of Bologna","ror":"https://ror.org/01111rn36","country_code":"IT","type":"education","lineage":["https://openalex.org/I9360294"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Erfan Azarkhish","raw_affiliation_strings":["Department of Electrical, Electronic and Information Engineering, University of Bologna, Bologna, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Electrical, Electronic and Information Engineering, University of Bologna, Bologna, Italy","institution_ids":["https://openalex.org/I9360294"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Davide Rossi","orcid":"https://orcid.org/0000-0002-0651-5393"},"institutions":[{"id":"https://openalex.org/I9360294","display_name":"University of Bologna","ror":"https://ror.org/01111rn36","country_code":"IT","type":"education","lineage":["https://openalex.org/I9360294"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Davide Rossi","raw_affiliation_strings":["Department of Electrical, Electronic and Information Engineering, University of Bologna, Bologna, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Electrical, Electronic and Information Engineering, University of Bologna, Bologna, Italy","institution_ids":["https://openalex.org/I9360294"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Igor Loi","orcid":"https://orcid.org/0000-0003-3852-4662"},"institutions":[{"id":"https://openalex.org/I9360294","display_name":"University of Bologna","ror":"https://ror.org/01111rn36","country_code":"IT","type":"education","lineage":["https://openalex.org/I9360294"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Igor Loi","raw_affiliation_strings":["Department of Electrical, Electronic and Information Engineering, University of Bologna, Bologna, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Electrical, Electronic and Information Engineering, University of Bologna, Bologna, Italy","institution_ids":["https://openalex.org/I9360294"]}]},{"author_position":"last","author":{"id":null,"display_name":"Luca Benini","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]},{"id":"https://openalex.org/I9360294","display_name":"University of Bologna","ror":"https://ror.org/01111rn36","country_code":"IT","type":"education","lineage":["https://openalex.org/I9360294"]}],"countries":["CH","IT"],"is_corresponding":false,"raw_author_name":"Luca Benini","raw_affiliation_strings":["Department of Electrical, Electronic and Information Engineering, University of Bologna, Bologna, Italy","Department of Information Technology and Electrical Engineering, Swiss Federal Institute of Technology Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Department of Electrical, Electronic and Information Engineering, University of Bologna, Bologna, Italy","institution_ids":["https://openalex.org/I9360294"]},{"raw_affiliation_string":"Department of Information Technology and Electrical Engineering, Swiss Federal Institute of Technology Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I9360294"],"apc_list":null,"apc_paid":null,"fwci":6.7036,"has_fulltext":false,"cited_by_count":71,"citation_normalized_percentile":{"value":0.97639065,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"29","issue":"2","first_page":"420","last_page":"434"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6122000217437744,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6122000217437744,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.0949999988079071,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.0471000000834465,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7566999793052673},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.6685000061988831},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.6187000274658203},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.5594000220298767},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.5568000078201294},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5439000129699707},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.4487999975681305},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.40849998593330383},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.38199999928474426}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8938999772071838},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7566999793052673},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.6685000061988831},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.6187000274658203},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.5594000220298767},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.5568000078201294},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5439000129699707},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5256999731063843},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.4487999975681305},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.40849998593330383},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4027000069618225},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.38199999928474426},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3776000142097473},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3693999946117401},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.3666999936103821},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.3391999900341034},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.3264000117778778},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.31839999556541443},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.3084999918937683},{"id":"https://openalex.org/C91481028","wikidata":"https://www.wikidata.org/wiki/Q1054686","display_name":"Distributed memory","level":3,"score":0.302700012922287},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2976999878883362},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C26771161","wikidata":"https://www.wikidata.org/wiki/Q16980","display_name":"ARM architecture","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C130795937","wikidata":"https://www.wikidata.org/wiki/Q2561570","display_name":"Remote direct memory access","level":2,"score":0.2540000081062317},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.2540000081062317}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tpds.2017.2752706","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2017.2752706","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1701.06420","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1701.06420","pdf_url":"https://arxiv.org/pdf/1701.06420","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:cris.unibo.it:11585/624052","is_oa":true,"landing_page_url":"http://hdl.handle.net/11585/624052","pdf_url":null,"source":{"id":"https://openalex.org/S4306402579","display_name":"Archivio istituzionale della ricerca (Alma Mater Studiorum Universit\u00e0 di Bologna)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210117483","host_organization_name":"Istituto di Ematologia di Bologna","host_organization_lineage":["https://openalex.org/I4210117483"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1701.06420","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1701.06420","pdf_url":"https://arxiv.org/pdf/1701.06420","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1093674026","display_name":null,"funder_award_id":"agreement","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"},{"id":"https://openalex.org/G4247904154","display_name":null,"funder_award_id":"European Research Council","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"},{"id":"https://openalex.org/G4287651321","display_name":null,"funder_award_id":"162524","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G937299111","display_name":"Integration eines objekteorientierten Datenbanksystems mit einem Application Framework.","funder_award_id":"29112","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320320924","display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","ror":"https://ror.org/00yjd3n13"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1002967077","https://openalex.org/W1801710398","https://openalex.org/W1981220134","https://openalex.org/W1987882202","https://openalex.org/W2002555321","https://openalex.org/W2019459561","https://openalex.org/W2038679012","https://openalex.org/W2043039667","https://openalex.org/W2046500578","https://openalex.org/W2061768564","https://openalex.org/W2073300972","https://openalex.org/W2081835714","https://openalex.org/W2091902424","https://openalex.org/W2092985495","https://openalex.org/W2097117768","https://openalex.org/W2125203716","https://openalex.org/W2145287260","https://openalex.org/W2151964848","https://openalex.org/W2155893237","https://openalex.org/W2156779053","https://openalex.org/W2173675559","https://openalex.org/W2234163201","https://openalex.org/W2258963578","https://openalex.org/W2285660444","https://openalex.org/W2289252105","https://openalex.org/W2291026910","https://openalex.org/W2291750097","https://openalex.org/W2350819558","https://openalex.org/W2418984255","https://openalex.org/W2508602506","https://openalex.org/W2515080096","https://openalex.org/W2515287984","https://openalex.org/W2518511512","https://openalex.org/W2520083297","https://openalex.org/W2526416132","https://openalex.org/W4244114199","https://openalex.org/W4246587277","https://openalex.org/W4247470470","https://openalex.org/W6614148910","https://openalex.org/W6637151318","https://openalex.org/W6637373629","https://openalex.org/W6655987917","https://openalex.org/W6674479107","https://openalex.org/W6687483927","https://openalex.org/W6693960836","https://openalex.org/W6694517276","https://openalex.org/W6698183232","https://openalex.org/W6725294768"],"related_works":[],"abstract_inverted_index":{"High-performance":[0],"computing":[1],"systems":[2],"are":[3,123],"moving":[4],"towards":[5],"2.5D":[6],"and":[7,17,51,66,108,118,153,208,219],"3D":[8],"memory":[9,26],"hierarchies,":[10],"based":[11,101],"on":[12,102],"High":[13],"Bandwidth":[14],"Memory":[15,19,79],"(HBM)":[16],"Hybrid":[18],"Cube":[20],"(HMC)":[21],"to":[22,35,83,125,225],"mitigate":[23],"the":[24,61,84,143,195,209],"main":[25],"bottlenecks.":[27],"This":[28],"trend":[29],"is":[30,179,191],"also":[31],"creating":[32],"new":[33],"opportunities":[34],"revisit":[36],"near-memory":[37],"computation.":[38],"In":[39,112],"this":[40,128],"paper,":[41],"we":[42],"propose":[43],"a":[44,75,90,98,114,119,132,150,170,182,217,229],"flexible":[45],"processor-in-memory":[46],"(PIM)":[47],"solution":[48],"for":[49,64,161],"scalable":[50,120,224],"energy-efficient":[52],"execution":[53,163],"of":[54,60,74,77,142,158,164,173,232],"deep":[55],"convolutional":[56],"networks":[57],"(ConvNets),":[58],"one":[59],"fastest-growing":[62],"workloads":[63],"servers":[65],"high-end":[67],"embedded":[68],"systems.":[69],"Our":[70],"co-design":[71],"approach":[72],"consists":[73],"network":[76,231],"Smart":[78],"Cubes":[80],"(modular":[81],"extensions":[82],"standard":[85,151],"HMC)":[86],"each":[87],"augmented":[88],"with":[89,131,186,228],"many-core":[91],"PIM":[92,215],"platform":[93],"called":[94],"NeuroCluster.":[95],"NeuroClusters":[96],"have":[97],"modular":[99],"design":[100],"NeuroStream":[103],"coprocessors":[104],"(for":[105],"Convolution-intensive":[106],"computations)":[107],"general-purpose":[109],"RISC-V":[110],"cores.":[111],"addition,":[113],"DRAM-friendly":[115],"tiling":[116],"mechanism":[117],"computation":[121],"paradigm":[122],"presented":[124],"efficiently":[126],"harness":[127],"computational":[129],"capability":[130],"very":[133],"low":[134],"programming":[135],"effort.":[136],"NeuroCluster":[137],"occupies":[138],"only":[139],"8":[140],"percent":[141],"total":[144],"logic-base":[145],"(LoB)":[146],"die":[147],"area":[148,211],"in":[149,181,199,205],"HMC":[152],"achieves":[154],"an":[155],"average":[156],"performance":[157],"240":[159],"GFLOPS":[160,227],"complete":[162],"full-featured":[165],"state-of-the-art":[166],"(SoA)":[167],"ConvNets":[168],"within":[169],"power":[171,207],"budget":[172],"2.5":[174],"W.":[175],"Overall":[176],"11":[177],"W":[178],"consumed":[180],"single":[183],"SMC":[184],"device,":[185],"22.5":[187],"GFLOPS/W":[188],"energy-efficiency":[189],"which":[190],"3.5X":[192],"better":[193],"than":[194],"best":[196],"GPU":[197],"implementations":[198],"similar":[200],"technologies.":[201],"The":[202],"minor":[203],"increase":[204,212],"system-level":[206],"negligible":[210],"make":[213],"our":[214],"system":[216],"cost-effective":[218],"energy":[220],"efficient":[221],"solution,":[222],"easily":[223],"955":[226],"small":[230],"just":[233],"four":[234],"SMCs.":[235]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":19},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":11}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2017-02-03T00:00:00"}
