{"id":"https://openalex.org/W4386710183","doi":"https://doi.org/10.1145/3615661","title":"An Efficient FPGA-based Depthwise Separable Convolutional Neural Network Accelerator with Hardware Pruning","display_name":"An Efficient FPGA-based Depthwise Separable Convolutional Neural Network Accelerator with Hardware Pruning","publication_year":2023,"publication_date":"2023-09-13","ids":{"openalex":"https://openalex.org/W4386710183","doi":"https://doi.org/10.1145/3615661"},"language":"en","primary_location":{"id":"doi:10.1145/3615661","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3615661","pdf_url":null,"source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Reconfigurable Technology and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://scholars.cityu.edu.hk/en/publications/an-efficient-fpga-based-depthwise-separable-convolutional-neural-","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101942287","display_name":"Zhengyan Liu","orcid":"https://orcid.org/0009-0000-6071-6738"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhengyan Liu","raw_affiliation_strings":["School of Microelectronics, Tianjin University, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100409523","display_name":"Qiang Liu","orcid":"https://orcid.org/0000-0003-1375-0508"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiang Liu","raw_affiliation_strings":["School of Microelectronics, Tianjin University, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101411506","display_name":"Shun Yan","orcid":"https://orcid.org/0000-0003-2468-5799"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shun Yan","raw_affiliation_strings":["School of Microelectronics, Tianjin University, China"],"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077847474","display_name":"Ray C. C. Cheung","orcid":"https://orcid.org/0000-0002-6764-0729"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Ray C. C. Cheung","raw_affiliation_strings":["Department of Electrical Engineering, City University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, City University of Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101942287"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":2.2617,"has_fulltext":true,"cited_by_count":19,"citation_normalized_percentile":{"value":0.90213094,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"17","issue":"1","first_page":"1","last_page":"20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9023104906082153},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.7176401615142822},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.7167296409606934},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6258687376976013},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6098121404647827},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.5043882131576538},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.5030092597007751},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.44086745381355286},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.425913006067276},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.4177859127521515},{"id":"https://openalex.org/keywords/adder","display_name":"Adder","score":0.4106799066066742},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.40448102355003357},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.396789014339447},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.37197884917259216},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.184093177318573},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.18182989954948425}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9023104906082153},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.7176401615142822},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.7167296409606934},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6258687376976013},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6098121404647827},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.5043882131576538},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.5030092597007751},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.44086745381355286},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.425913006067276},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.4177859127521515},{"id":"https://openalex.org/C164620267","wikidata":"https://www.wikidata.org/wiki/Q376953","display_name":"Adder","level":3,"score":0.4106799066066742},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.40448102355003357},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.396789014339447},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.37197884917259216},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.184093177318573},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.18182989954948425},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3615661","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3615661","pdf_url":null,"source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Reconfigurable Technology and Systems","raw_type":"journal-article"},{"is_oa":true,"landing_page_url":"https://scholars.cityu.edu.hk/en/publications/an-efficient-fpga-based-depthwise-separable-convolutional-neural-","pdf_url":null,"source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Reconfigurable Technology and Systems","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:publications/95cafd33-cff7-43ad-8747-97742d96b9c6","is_oa":true,"landing_page_url":"https://hdl.handle.net/2031/95cafd33-cff7-43ad-8747-97742d96b9c6","pdf_url":"https://scholars.cityu.edu.hk/files/266856693/237781924.pdf","source":{"id":"https://openalex.org/S7407055387","display_name":"CityU Scholars","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"LIU, Z, LIU, Q, YAN, S & CHEUNG, R C C 2024, 'An Efficient FPGA-based Depthwise Separable Convolutional Neural Network Accelerator with Hardware Pruning', ACM Transactions on Reconfigurable Technology and Systems, vol. 17, no. 1, 15. https://doi.org/10.1145/3615661","raw_type":"article"}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://scholars.cityu.edu.hk/en/publications/an-efficient-fpga-based-depthwise-separable-convolutional-neural-","pdf_url":null,"source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Reconfigurable Technology and Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.5099999904632568}],"awards":[{"id":"https://openalex.org/G6803541034","display_name":null,"funder_award_id":"U21B2031","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2602816542","https://openalex.org/W2618530766","https://openalex.org/W2761085955","https://openalex.org/W2887936511","https://openalex.org/W2913221350","https://openalex.org/W2919512338","https://openalex.org/W2963163009","https://openalex.org/W3012561096","https://openalex.org/W3162993841","https://openalex.org/W3215511279","https://openalex.org/W4236965008","https://openalex.org/W4301409532","https://openalex.org/W4407953972"],"related_works":["https://openalex.org/W2595172197","https://openalex.org/W4390550886","https://openalex.org/W2084856301","https://openalex.org/W2127970246","https://openalex.org/W3217463396","https://openalex.org/W2885125400","https://openalex.org/W1989889224","https://openalex.org/W4382618745","https://openalex.org/W1973775000","https://openalex.org/W2748922771"],"abstract_inverted_index":{"Convolutional":[0],"neural":[1],"networks":[2],"(CNNs)":[3],"have":[4],"been":[5],"widely":[6],"deployed":[7],"in":[8],"computer":[9],"vision":[10],"tasks.":[11],"However,":[12],"the":[13,50,93,138,148,164,185],"computation":[14],"and":[15,68,114,129,142,150,153,158],"resource":[16],"intensive":[17],"characteristics":[18],"of":[19,79],"CNN":[20],"bring":[21],"obstacles":[22],"to":[23,75,97,107,122],"its":[24],"application":[25],"on":[26,36,171],"embedded":[27],"systems.":[28],"This":[29],"article":[30],"proposes":[31],"an":[32,58],"efficient":[33,59],"inference":[34],"accelerator":[35,51,139],"Field":[37],"Programmable":[38],"Gate":[39],"Array":[40],"(FPGA)":[41],"for":[42,65,92,136],"CNNs":[43],"with":[44,62,87],"depthwise":[45],"separable":[46],"convolutions.":[47],"To":[48],"improve":[49],"efficiency,":[52],"we":[53],"make":[54],"four":[55],"contributions:":[56],"(1)":[57],"convolution":[60,80],"engine":[61],"multiple":[63],"strategies":[64],"exploiting":[66],"parallelism":[67],"a":[69,83,103,116],"configurable":[70],"adder":[71],"tree":[72],"are":[73],"designed":[74,91],"support":[76,123],"three":[77],"types":[78],"operations;":[81],"(2)":[82],"dedicated":[84],"architecture":[85],"combined":[86],"input":[88],"buffers":[89],"is":[90,112,120,163],"bottleneck":[94],"network":[95],"structure":[96],"reduce":[98,180],"data":[99],"transmission":[100],"time;":[101],"(3)":[102],"hardware":[104],"padding":[105,110],"scheme":[106],"eliminate":[108],"invalid":[109],"operations":[111],"proposed;":[113],"(4)":[115],"hardware-assisted":[117,175],"pruning":[118,176],"method":[119,177],"developed":[121],"online":[124],"tradeoff":[125],"between":[126],"model":[127],"accuracy":[128,186],"power":[130,182],"consumption.":[131],"Experimental":[132],"results":[133],"show":[134],"that":[135,162],"MobileNetV2":[137],"achieves":[140],"10\u00d7":[141],"6\u00d7":[143],"energy":[144],"efficiency":[145],"improvement":[146],"over":[147],"CPU":[149],"GPU":[151],"implementation,":[152],"302.3":[154],"frames":[155],"per":[156],"second":[157],"181.8":[159],"GOPS":[160],"performance":[161],"best":[165],"among":[166],"several":[167],"existing":[168],"single-engine":[169],"accelerators":[170],"FPGAs.":[172],"The":[173],"proposed":[174],"can":[178],"effectively":[179],"59.7%":[181],"consumption":[183],"at":[184],"loss":[187],"within":[188],"5%.":[189]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":6}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
