{"id":"https://openalex.org/W2796360442","doi":"https://doi.org/10.1109/lca.2018.2823302","title":"A Case for Memory-Centric HPC System Architecture for Training Deep Neural Networks","display_name":"A Case for Memory-Centric HPC System Architecture for Training Deep Neural Networks","publication_year":2018,"publication_date":"2018-04-06","ids":{"openalex":"https://openalex.org/W2796360442","doi":"https://doi.org/10.1109/lca.2018.2823302","mag":"2796360442"},"language":"en","primary_location":{"id":"doi:10.1109/lca.2018.2823302","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lca.2018.2823302","pdf_url":null,"source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037670571","display_name":"Youngeun Kwon","orcid":"https://orcid.org/0000-0002-4020-8995"},"institutions":[{"id":"https://openalex.org/I123900574","display_name":"Pohang University of Science and Technology","ror":"https://ror.org/04xysgw12","country_code":"KR","type":"education","lineage":["https://openalex.org/I123900574"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Youngeun Kwon","raw_affiliation_strings":["Pohang University of Science and Technology, Pohang, Gyeongsangbuk-do, South Korea"],"affiliations":[{"raw_affiliation_string":"Pohang University of Science and Technology, Pohang, Gyeongsangbuk-do, South Korea","institution_ids":["https://openalex.org/I123900574"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091648103","display_name":"Minsoo Rhu","orcid":"https://orcid.org/0000-0003-3303-8681"},"institutions":[{"id":"https://openalex.org/I123900574","display_name":"Pohang University of Science and Technology","ror":"https://ror.org/04xysgw12","country_code":"KR","type":"education","lineage":["https://openalex.org/I123900574"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Minsoo Rhu","raw_affiliation_strings":["Pohang University of Science and Technology, Pohang, Gyeongsangbuk-do, South Korea"],"affiliations":[{"raw_affiliation_string":"Pohang University of Science and Technology, Pohang, Gyeongsangbuk-do, South Korea","institution_ids":["https://openalex.org/I123900574"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5037670571"],"corresponding_institution_ids":["https://openalex.org/I123900574"],"apc_list":null,"apc_paid":null,"fwci":4.0395,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.94426791,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"17","issue":"2","first_page":"134","last_page":"138"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8497022390365601},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.8454203605651855},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6325971484184265},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.5310084223747253},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.5162324905395508},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.44025516510009766},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.43321558833122253},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.42462098598480225},{"id":"https://openalex.org/keywords/auxiliary-memory","display_name":"Auxiliary memory","score":0.4232247471809387},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.4115951359272003},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3664991855621338},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3518820106983185},{"id":"https://openalex.org/keywords/semiconductor-memory","display_name":"Semiconductor memory","score":0.2809644043445587},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24613797664642334},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.24182093143463135}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8497022390365601},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.8454203605651855},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6325971484184265},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.5310084223747253},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5162324905395508},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.44025516510009766},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.43321558833122253},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.42462098598480225},{"id":"https://openalex.org/C82687282","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Auxiliary memory","level":2,"score":0.4232247471809387},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.4115951359272003},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3664991855621338},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3518820106983185},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.2809644043445587},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24613797664642334},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.24182093143463135},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/lca.2018.2823302","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lca.2018.2823302","pdf_url":null,"source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"},{"id":"pmh:oai:oasis.postech.ac.kr:2014.oak/99268","is_oa":false,"landing_page_url":"https://oasis.postech.ac.kr/handle/2014.oak/99268","pdf_url":null,"source":{"id":"https://openalex.org/S4306401965","display_name":"Open Access System for Information Sharing (Pohang University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I123900574","host_organization_name":"Pohang University of Science and Technology","host_organization_lineage":["https://openalex.org/I123900574"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.5199999809265137}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2007511350","https://openalex.org/W2009571103","https://openalex.org/W2022655850","https://openalex.org/W2161047342","https://openalex.org/W2163605009","https://openalex.org/W2489529491","https://openalex.org/W2964174152","https://openalex.org/W4256373055","https://openalex.org/W6655987917","https://openalex.org/W6684191040"],"related_works":["https://openalex.org/W2540591044","https://openalex.org/W1509211761","https://openalex.org/W2391299576","https://openalex.org/W1531488649","https://openalex.org/W2171734863","https://openalex.org/W3166325303","https://openalex.org/W2103460560","https://openalex.org/W51964040","https://openalex.org/W1492654723","https://openalex.org/W2845028610"],"abstract_inverted_index":{"As":[0],"the":[1,4,24,29,34,38,55,60,81,88,118],"models":[2,11],"and":[3,91,116],"datasets":[5],"to":[6,59,102,122],"train":[7],"deep":[8,48],"learning":[9,49],"(DL)":[10],"scale,":[12],"system":[13,50],"architects":[14],"are":[15,85],"faced":[16],"with":[17],"new":[18],"challenges,":[19],"one":[20],"of":[21,76,124],"which":[22,84],"is":[23],"memory":[25,32,56,77,98,120],"capacity":[26,57,99,121],"bottleneck,":[27],"where":[28],"limited":[30],"physical":[31],"inside":[33],"accelerator":[35],"device":[36],"constrains":[37],"algorithm":[39],"that":[40,51],"can":[41,52],"be":[42],"studied.":[43],"We":[44],"propose":[45],"a":[46,74,94],"memory-centric":[47],"transparently":[53],"expand":[54],"accessible":[58],"accelerators":[61],"while":[62],"also":[63],"providing":[64],"fast":[65],"inter-device":[66],"communication":[67],"for":[68,96],"parallel":[69],"training.":[70],"Our":[71],"proposal":[72,106],"aggregates":[73],"pool":[75],"modules":[78],"locally":[79],"within":[80],"device-side":[82],"interconnect,":[83],"decoupled":[86],"from":[87],"host":[89],"interface":[90],"function":[92],"as":[93],"vehicle":[95],"transparent":[97],"expansion.":[100],"Compared":[101],"conventional":[103],"systems,":[104],"our":[105],"achieves":[107],"an":[108],"average":[109],"2:1\u00d7":[110],"speedup":[111],"on":[112],"eight":[113],"DL":[114],"applications":[115],"increases":[117],"system-wide":[119],"tens":[123],"TBs.":[125]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":8}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
