{"id":"https://openalex.org/W2941408718","doi":"https://doi.org/10.1109/tcad.2019.2912894","title":"Toward an Efficient Deep Pipelined Template-Based Architecture for Accelerating the Entire 2-D and 3-D CNNs on FPGA","display_name":"Toward an Efficient Deep Pipelined Template-Based Architecture for Accelerating the Entire 2-D and 3-D CNNs on FPGA","publication_year":2019,"publication_date":"2019-04-25","ids":{"openalex":"https://openalex.org/W2941408718","doi":"https://doi.org/10.1109/tcad.2019.2912894","mag":"2941408718"},"language":"en","primary_location":{"id":"doi:10.1109/tcad.2019.2912894","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2019.2912894","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037428152","display_name":"Junzhong Shen","orcid":"https://orcid.org/0000-0001-6233-6800"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Junzhong Shen","raw_affiliation_strings":["College of Computer, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101058401","display_name":"You Huang","orcid":"https://orcid.org/0000-0002-9006-1522"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"You Huang","raw_affiliation_strings":["College of Computer, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101937502","display_name":"Mei Wen","orcid":"https://orcid.org/0000-0002-5875-3297"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mei Wen","raw_affiliation_strings":["College of Computer, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100710936","display_name":"Chunyuan Zhang","orcid":"https://orcid.org/0000-0002-0944-2708"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunyuan Zhang","raw_affiliation_strings":["College of Computer, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5037428152"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":2.0426,"has_fulltext":false,"cited_by_count":42,"citation_normalized_percentile":{"value":0.89707208,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"39","issue":"7","first_page":"1442","last_page":"1455"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.8213227987289429},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7969179749488831},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.7856010794639587},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.559349536895752},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.536952018737793},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.47184184193611145},{"id":"https://openalex.org/keywords/template","display_name":"Template","score":0.4694103002548218},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.4625700116157532},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.45064055919647217},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4263656735420227},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.41486066579818726},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.288719117641449},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.27796825766563416},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.0794353187084198}],"concepts":[{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.8213227987289429},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7969179749488831},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.7856010794639587},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.559349536895752},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.536952018737793},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.47184184193611145},{"id":"https://openalex.org/C82714645","wikidata":"https://www.wikidata.org/wiki/Q438331","display_name":"Template","level":2,"score":0.4694103002548218},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.4625700116157532},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.45064055919647217},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4263656735420227},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.41486066579818726},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.288719117641449},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.27796825766563416},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0794353187084198},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcad.2019.2912894","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2019.2912894","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8899999856948853,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W1667652561","https://openalex.org/W1686810756","https://openalex.org/W1977556410","https://openalex.org/W1983364832","https://openalex.org/W2090210354","https://openalex.org/W2094756095","https://openalex.org/W2155893237","https://openalex.org/W2163605009","https://openalex.org/W2172654076","https://openalex.org/W2194775991","https://openalex.org/W2261808795","https://openalex.org/W2276486856","https://openalex.org/W2294282016","https://openalex.org/W2301358467","https://openalex.org/W2466675884","https://openalex.org/W2520083297","https://openalex.org/W2525740295","https://openalex.org/W2565305208","https://openalex.org/W2574797063","https://openalex.org/W2584311934","https://openalex.org/W2584616277","https://openalex.org/W2585774018","https://openalex.org/W2605487586","https://openalex.org/W2606722458","https://openalex.org/W2625954420","https://openalex.org/W2626616508","https://openalex.org/W2729080111","https://openalex.org/W2767899175","https://openalex.org/W2773339846","https://openalex.org/W2789246071","https://openalex.org/W2794187429","https://openalex.org/W2794754997","https://openalex.org/W2808917878","https://openalex.org/W2962835968","https://openalex.org/W3104246147","https://openalex.org/W4249932213","https://openalex.org/W4302296459","https://openalex.org/W6637151318","https://openalex.org/W6637373629","https://openalex.org/W6674479107","https://openalex.org/W6684191040","https://openalex.org/W6727759651"],"related_works":["https://openalex.org/W2524802307","https://openalex.org/W2466675884","https://openalex.org/W2951390974","https://openalex.org/W2997828269","https://openalex.org/W2160069347","https://openalex.org/W2047588290","https://openalex.org/W4363649491","https://openalex.org/W3185363859","https://openalex.org/W4316658533","https://openalex.org/W2351404747"],"abstract_inverted_index":{"3-D":[0,40,49,74,94,106,135,190,222],"convolutional":[1],"neural":[2],"networks":[3],"(3-D":[4],"CNNs)":[5],"are":[6,76,155],"used":[7],"efficiently":[8,143],"in":[9,17,100,232],"many":[10],"computer":[11],"vision":[12],"applications.":[13],"Most":[14],"previous":[15],"work":[16],"this":[18,98],"area":[19],"has":[20],"concentrated":[21],"only":[22],"on":[23,42,51,123,192],"design":[24],"and":[25,62,73,105,134,161,172,189,214,230,234,241],"optimization":[26],"of":[27,48,71,87,132,142,147,164,180,221,226],"accelerators":[28],"for":[29,85,93,203],"2-D":[30,72,88,104,133,188],"CNNs,":[31],"with":[32,139,212],"few":[33],"attempts":[34],"having":[35],"been":[36,83],"made":[37],"to":[38,53,57,102,127,157,228,237],"accelerate":[39,103],"CNNs":[41,50,75,89,107,149,191],"FPGA.":[43],"We":[44],"find":[45],"the":[46,68,78,124,129,140,159,165,178,181,193],"acceleration":[47,86],"FPGA":[52,195],"be":[54,91],"challenging":[55],"due":[56],"their":[58],"high":[59],"computational":[60,69,162],"complexity":[61],"storage":[63],"demands.":[64],"More":[65],"importantly,":[66],"although":[67],"patterns":[70],"analogous,":[77],"conventional":[79],"approaches":[80],"that":[81,119,218],"have":[82],"adopted":[84],"may":[90],"unfit":[92],"CNN":[95,136,209,223],"acceleration.":[96],"In":[97],"paper,":[99],"order":[101],"using":[108],"a":[109,115,151,238,242,248],"uniform":[110,116],"framework,":[111],"we":[112,176,199],"first":[113],"propose":[114],"template-based":[117],"architecture":[118,184],"uses":[120],"templates":[121],"based":[122],"Winograd":[125],"algorithm":[126],"ensure":[128],"rapid":[130],"development":[131],"accelerators.":[137,210],"Then,":[138],"aim":[141],"mapping":[144],"all":[145],"layers":[146],"2-D/3-D":[148],"onto":[150],"pipelined":[152,183],"accelerator,":[153,166],"techniques":[154],"developed":[156],"improve":[158],"throughput":[160],"efficiency":[163,245],"including":[167],"layer":[168,170],"fusion,":[169],"clustering,":[171],"workload-balancing":[173],"scheme.":[174],"Finally,":[175],"demonstrate":[177,217],"effectiveness":[179],"deep":[182],"by":[185],"accelerating":[186],"real-life":[187],"state-of-the-art":[194,207],"platform.":[196],"On":[197],"VCU118,":[198],"achieve":[200],"3.7":[201],"TOPS":[202],"VGG-16,":[204],"which":[205],"outperforms":[206],"FPGA-based":[208],"Comparisons":[211],"CPU":[213,239],"GPU":[215,249],"solutions":[216],"our":[219],"implementation":[220],"achieves":[224],"gains":[225],"up":[227],"17.8\u00d7":[229],"64.2\u00d7":[231],"performance":[233],"energy":[235,244],"relative":[236],"solution,":[240],"5.0\u00d7":[243],"gain":[246],"over":[247],"solution.":[250]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":6}],"updated_date":"2026-03-17T09:09:15.849793","created_date":"2025-10-10T00:00:00"}
