{"id":"https://openalex.org/W2802258670","doi":"https://doi.org/10.1109/isqed.2018.8357317","title":"High performance training of deep neural networks using pipelined hardware acceleration and distributed memory","display_name":"High performance training of deep neural networks using pipelined hardware acceleration and distributed memory","publication_year":2018,"publication_date":"2018-03-01","ids":{"openalex":"https://openalex.org/W2802258670","doi":"https://doi.org/10.1109/isqed.2018.8357317","mag":"2802258670"},"language":"en","primary_location":{"id":"doi:10.1109/isqed.2018.8357317","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isqed.2018.8357317","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 19th International Symposium on Quality Electronic Design (ISQED)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087624455","display_name":"Raghav Mehta","orcid":"https://orcid.org/0000-0003-0824-5304"},"institutions":[{"id":"https://openalex.org/I4210137693","display_name":"Siemens (United States)","ror":"https://ror.org/04axb7e79","country_code":"US","type":"company","lineage":["https://openalex.org/I1325886976","https://openalex.org/I4210137693"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Raghav Mehta","raw_affiliation_strings":["Mentor, A Siemens Business, Wilsonville, OR, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mentor, A Siemens Business, Wilsonville, OR, USA","institution_ids":["https://openalex.org/I4210137693"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115599900","display_name":"Yuyang Huang","orcid":"https://orcid.org/0009-0000-7665-8827"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuyang Huang","raw_affiliation_strings":["Nvidia, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nvidia, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057367919","display_name":"Mingxi Cheng","orcid":"https://orcid.org/0000-0002-8070-6665"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mingxi Cheng","raw_affiliation_strings":["Duke University, Durham, NC, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019837261","display_name":"Shrey Bagga","orcid":"https://orcid.org/0000-0002-9218-8340"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shrey Bagga","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076838307","display_name":"Nishant Mathur","orcid":"https://orcid.org/0000-0002-6670-3273"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nishant Mathur","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100421180","display_name":"Ji Li","orcid":"https://orcid.org/0000-0003-4699-084X"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ji Li","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110567690","display_name":"Jeffrey Draper","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeffrey Draper","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065681916","display_name":"Shahin Nazarian","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shahin Nazarian","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.53,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.70619485,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"7","issue":null,"first_page":"383","last_page":"388"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mnist-database","display_name":"MNIST database","score":0.8548308610916138},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8453496694564819},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.713944673538208},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.5879918336868286},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5714160799980164},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.5393983721733093},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5365318059921265},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.5095908641815186},{"id":"https://openalex.org/keywords/network-topology","display_name":"Network topology","score":0.44411128759384155},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4401158094406128},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.41727474331855774},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.4118940234184265},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3658652901649475},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.33828675746917725},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.32711705565452576},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.3001871705055237},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.29620057344436646},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1259966492652893},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.09328094124794006}],"concepts":[{"id":"https://openalex.org/C190502265","wikidata":"https://www.wikidata.org/wiki/Q17069496","display_name":"MNIST database","level":3,"score":0.8548308610916138},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8453496694564819},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.713944673538208},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.5879918336868286},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5714160799980164},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5393983721733093},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5365318059921265},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.5095908641815186},{"id":"https://openalex.org/C199845137","wikidata":"https://www.wikidata.org/wiki/Q145490","display_name":"Network topology","level":2,"score":0.44411128759384155},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4401158094406128},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.41727474331855774},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4118940234184265},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3658652901649475},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.33828675746917725},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.32711705565452576},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.3001871705055237},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29620057344436646},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1259966492652893},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.09328094124794006},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isqed.2018.8357317","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isqed.2018.8357317","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 19th International Symposium on Quality Electronic Design (ISQED)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8999999761581421,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1598605393","https://openalex.org/W1686810756","https://openalex.org/W1726370773","https://openalex.org/W1981617403","https://openalex.org/W2007339694","https://openalex.org/W2025198378","https://openalex.org/W2048266589","https://openalex.org/W2111072639","https://openalex.org/W2162390675","https://openalex.org/W2163605009","https://openalex.org/W2285660444","https://openalex.org/W2289252105","https://openalex.org/W2402098947","https://openalex.org/W2520083297","https://openalex.org/W2537962049","https://openalex.org/W2549630556","https://openalex.org/W2551814622","https://openalex.org/W2588861206","https://openalex.org/W2593971914","https://openalex.org/W2597215741","https://openalex.org/W2612565436","https://openalex.org/W2616152995","https://openalex.org/W2734213494","https://openalex.org/W2769269352","https://openalex.org/W2919115771","https://openalex.org/W2962676816","https://openalex.org/W3024621361","https://openalex.org/W4211050817","https://openalex.org/W4399637226","https://openalex.org/W6683722107","https://openalex.org/W6684191040","https://openalex.org/W6729063632"],"related_works":["https://openalex.org/W4285144618","https://openalex.org/W2518118925","https://openalex.org/W4285104150","https://openalex.org/W3159273459","https://openalex.org/W4319952061","https://openalex.org/W4280636456","https://openalex.org/W4388913998","https://openalex.org/W4310584535","https://openalex.org/W4295935044","https://openalex.org/W3159906349"],"abstract_inverted_index":{"Recently,":[0],"Deep":[1],"Neural":[2],"Networks":[3],"(DNNs)":[4],"have":[5,39],"made":[6],"unprecedented":[7],"progress":[8],"in":[9,23],"various":[10,79],"tasks.":[11],"However,":[12],"there":[13],"is":[14,51],"a":[15,59,68],"timely":[16],"need":[17],"to":[18,42,71,89,110],"accelerate":[19],"the":[20,44,47,91,116,123],"training":[21,49,95,131],"process":[22,50,132],"DNNs":[24],"specifically":[25],"for":[26,67,130],"real-time":[27],"applications":[28],"that":[29,120],"demand":[30],"high":[31],"performance,":[32],"energy":[33,143],"efficiency":[34],"and":[35,81,100,140],"compactness.":[36],"Numerous":[37],"algorithms":[38,82],"been":[40],"proposed":[41,104,127],"improve":[43],"accuracy,":[45],"however":[46],"network":[48],"computationally":[52],"slow.":[53],"In":[54],"this":[55],"paper,":[56],"we":[57],"present":[58],"scalable":[60],"pipelined":[61],"hardware":[62],"architecture":[63],"with":[64,108,122],"distributed":[65],"memories":[66],"digital":[69],"neuron":[70],"implement":[72],"deep":[73],"neural":[74],"networks.":[75],"We":[76],"also":[77],"explore":[78],"functions":[80],"as":[83,85],"well":[84],"different":[86],"memory":[87],"topologies,":[88],"optimize":[90],"performance":[92],"of":[93,102],"our":[94,103,126],"architecture.":[96],"The":[97],"power,":[98],"area,":[99],"delay":[101],"model":[105],"are":[106],"evaluated":[107],"respect":[109],"software":[111,124],"implementation.":[112],"Experimental":[113],"results":[114],"on":[115],"MNIST":[117],"dataset":[118],"demonstrate":[119],"compared":[121],"training,":[125],"hardware-based":[128],"approach":[129],"achieves":[133],"33X":[134],"runtime":[135],"reduction,":[136,139],"5X":[137],"power":[138],"nearly":[141],"168X":[142],"reduction.":[144]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
