{"id":"https://openalex.org/W2889354425","doi":"https://doi.org/10.1109/tpds.2018.2868062","title":"Exploiting Parallelism for CNN Applications on 3D Stacked Processing-In-Memory Architecture","display_name":"Exploiting Parallelism for CNN Applications on 3D Stacked Processing-In-Memory Architecture","publication_year":2018,"publication_date":"2018-08-31","ids":{"openalex":"https://openalex.org/W2889354425","doi":"https://doi.org/10.1109/tpds.2018.2868062","mag":"2889354425"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2018.2868062","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2018.2868062","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100364961","display_name":"Yi Wang","orcid":"https://orcid.org/0000-0002-5773-3817"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yi Wang","raw_affiliation_strings":["College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032587346","display_name":"Weixuan Chen","orcid":"https://orcid.org/0000-0003-2323-5200"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weixuan Chen","raw_affiliation_strings":["College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101516308","display_name":"Jing Yang","orcid":"https://orcid.org/0009-0000-0157-5971"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Yang","raw_affiliation_strings":["Experimental and Innovation Practice Center, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Experimental and Innovation Practice Center, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100455386","display_name":"Tao Li","orcid":"https://orcid.org/0000-0003-3113-6835"},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tao Li","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Florida, Gainesville, FL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Florida, Gainesville, FL, USA","institution_ids":["https://openalex.org/I33213144"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100364961"],"corresponding_institution_ids":["https://openalex.org/I180726961"],"apc_list":null,"apc_paid":null,"fwci":1.5669,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.87798152,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"30","issue":"3","first_page":"589","last_page":"600"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9185729026794434},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.614778459072113},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6032593846321106},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5562658905982971},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5549413561820984},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5505783557891846},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.4870343804359436},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4726777672767639},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.42657583951950073},{"id":"https://openalex.org/keywords/memory-architecture","display_name":"Memory architecture","score":0.4258323311805725},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.4190569818019867},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3771924674510956},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.29757142066955566},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.22933459281921387},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.22533854842185974}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9185729026794434},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.614778459072113},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6032593846321106},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5562658905982971},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5549413561820984},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5505783557891846},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.4870343804359436},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4726777672767639},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.42657583951950073},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.4258323311805725},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.4190569818019867},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3771924674510956},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29757142066955566},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.22933459281921387},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.22533854842185974},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2018.2868062","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2018.2868062","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4000000059604645,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[{"id":"https://openalex.org/G1274432470","display_name":null,"funder_award_id":"61502309","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G265503101","display_name":null,"funder_award_id":"61702357","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4257132348","display_name":null,"funder_award_id":"18JCQNJC00300","funder_id":"https://openalex.org/F4320323993","funder_display_name":"Natural Science Foundation of Tianjin City"},{"id":"https://openalex.org/G4957431197","display_name":null,"funder_award_id":"CARCH201608","funder_id":"https://openalex.org/F4320335561","funder_display_name":"Institute of Computing Technology, Chinese Academy of Sciences"},{"id":"https://openalex.org/G6945344831","display_name":null,"funder_award_id":"2017B030314073","funder_id":"https://openalex.org/F4320321921","funder_display_name":"Natural Science Foundation of Guangdong Province"},{"id":"https://openalex.org/G8587239906","display_name":null,"funder_award_id":"2016A030313045","funder_id":"https://openalex.org/F4320321921","funder_display_name":"Natural Science Foundation of Guangdong Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321921","display_name":"Natural Science Foundation of Guangdong Province","ror":null},{"id":"https://openalex.org/F4320323993","display_name":"Natural Science Foundation of Tianjin City","ror":null},{"id":"https://openalex.org/F4320335561","display_name":"Institute of Computing Technology, Chinese Academy of Sciences","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1964311922","https://openalex.org/W1999085092","https://openalex.org/W2015861736","https://openalex.org/W2037984290","https://openalex.org/W2066087320","https://openalex.org/W2075960179","https://openalex.org/W2078141470","https://openalex.org/W2094756095","https://openalex.org/W2097117768","https://openalex.org/W2097998348","https://openalex.org/W2106411961","https://openalex.org/W2117696986","https://openalex.org/W2152839228","https://openalex.org/W2155893237","https://openalex.org/W2163605009","https://openalex.org/W2285660444","https://openalex.org/W2323693848","https://openalex.org/W2335240678","https://openalex.org/W2368124317","https://openalex.org/W2396572963","https://openalex.org/W2396622873","https://openalex.org/W2399948372","https://openalex.org/W2442974303","https://openalex.org/W2474451066","https://openalex.org/W2516141709","https://openalex.org/W2518511512","https://openalex.org/W2519733879","https://openalex.org/W2530749948","https://openalex.org/W2530887700","https://openalex.org/W2545376626","https://openalex.org/W2565305208","https://openalex.org/W2605347906","https://openalex.org/W2608697947","https://openalex.org/W2616014673","https://openalex.org/W2625231790","https://openalex.org/W2625998576","https://openalex.org/W2792682036","https://openalex.org/W2794470368","https://openalex.org/W2794532328","https://openalex.org/W2799257450","https://openalex.org/W2963145956","https://openalex.org/W4249654426","https://openalex.org/W4249932213","https://openalex.org/W6669679407","https://openalex.org/W6674385629","https://openalex.org/W6676179485","https://openalex.org/W6684191040","https://openalex.org/W6750650085"],"related_works":["https://openalex.org/W1657880117","https://openalex.org/W2595172197","https://openalex.org/W1496222301","https://openalex.org/W3207760230","https://openalex.org/W1590307681","https://openalex.org/W4312814274","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W2084856301","https://openalex.org/W2358353312"],"abstract_inverted_index":{"Deep":[0],"convolutional":[1,106],"neural":[2,91,107,138],"networks":[3,92,139],"(CNNs)":[4],"are":[5,197],"widely":[6],"adopted":[7],"in":[8,48,56,128],"intelligent":[9],"systems":[10],"with":[11],"unprecedented":[12],"accuracy":[13],"but":[14],"at":[15],"the":[16,26,45,49,57,78,82,86,110,123,135,148,175,178],"cost":[17],"of":[18,22,54,59,65,72,81,90,137,150,177,185,190],"a":[19,94,142,164,183,188],"substantial":[20],"amount":[21],"data":[23,34,160],"movement.":[24],"Although":[25],"emerging":[27],"processing-in-memory":[28],"(PIM)":[29],"architecture":[30,84],"seeks":[31],"to":[32,75,133,145,219],"minimize":[33],"movement":[35],"by":[36,120],"placing":[37],"memory":[38,42],"near":[39],"processing":[40,71,125,212],"elements,":[41],"is":[43,132],"still":[44],"major":[46],"bottleneck":[47],"entire":[50],"system.":[51],"The":[52,130,194],"selection":[53],"hyper-parameters":[55],"training":[58],"CNN":[60,192],"applications":[61],"requires":[62],"over":[63],"hundreds":[64],"kilobytes":[66],"cache":[67,216],"capacity":[68],"for":[69,104,118],"concurrent":[70],"convolutions.":[73],"How":[74],"jointly":[76,146],"explore":[77],"computation":[79,155],"capability":[80],"PIM":[83,111],"and":[85,140,154,168,214],"highly":[87],"parallel":[88],"property":[89],"remains":[93],"critical":[95],"issue.":[96],"This":[97],"paper":[98],"presents":[99],"Para-Net,":[100,180],"that":[101,207],"exploits":[102],"Parallelism":[103],"deterministic":[105],"Networks":[108],"on":[109],"architecture.":[112],"Para-":[113],"Net":[114],"achieves":[115],"data-level":[116],"parallelism":[117],"convolutions":[119],"fully":[121],"utilizing":[122],"on-chip":[124],"engine":[126],"(PE)":[127],"PIM.":[129],"objective":[131],"capture":[134],"characteristics":[136],"present":[141],"hardware-independent":[143],"design":[144],"optimize":[147],"scheduling":[149],"both":[151],"intermediate":[152],"results":[153,205],"tasks.":[156],"We":[157],"formulate":[158],"this":[159],"allocation":[161],"problem":[162],"as":[163],"dynamic":[165],"programming":[166],"model":[167],"obtain":[169],"an":[170],"optimal":[171],"solution.":[172],"To":[173],"demonstrate":[174],"viability":[176],"proposed":[179],"we":[181],"conduct":[182],"set":[184],"experiments":[186],"using":[187],"variety":[189],"realistic":[191],"applications.":[193],"graph":[195],"abstractions":[196],"obtained":[198],"from":[199],"deep":[200],"learning":[201],"framework":[202],"Caffe.":[203],"Experimental":[204],"show":[206],"Para-Net":[208],"can":[209],"significantly":[210],"reduce":[211],"time":[213],"improve":[215],"efficiency":[217],"compared":[218],"representative":[220],"schemes.":[221]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
