{"id":"https://openalex.org/W4294690791","doi":"https://doi.org/10.1109/aicas54282.2022.9869938","title":"An Efficient CNN Training Accelerator Leveraging Transposable Block Sparsity","display_name":"An Efficient CNN Training Accelerator Leveraging Transposable Block Sparsity","publication_year":2022,"publication_date":"2022-06-13","ids":{"openalex":"https://openalex.org/W4294690791","doi":"https://doi.org/10.1109/aicas54282.2022.9869938"},"language":"en","primary_location":{"id":"doi:10.1109/aicas54282.2022.9869938","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aicas54282.2022.9869938","pdf_url":null,"source":{"id":"https://openalex.org/S4363608281","display_name":"2022 IEEE 4th International Conference on Artificial Intelligence Circuits and Systems (AICAS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 4th International Conference on Artificial Intelligence Circuits and Systems (AICAS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051632607","display_name":"Mingyang Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyang Xu","raw_affiliation_strings":["School of Electronic Science and Engineering, Nanjing University,Nanjing,China","School of Electronic Science and Engineering, Nanjing University, Nanjing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University,Nanjing,China","institution_ids":["https://openalex.org/I881766915"]},{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059107361","display_name":"Jinming Lu","orcid":"https://orcid.org/0000-0002-7134-6514"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinming Lu","raw_affiliation_strings":["School of Electronic Science and Engineering, Nanjing University,Nanjing,China","School of Electronic Science and Engineering, Nanjing University, Nanjing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University,Nanjing,China","institution_ids":["https://openalex.org/I881766915"]},{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100696999","display_name":"Zhongfeng Wang","orcid":"https://orcid.org/0000-0002-7227-4786"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongfeng Wang","raw_affiliation_strings":["School of Electronic Science and Engineering, Nanjing University,Nanjing,China","School of Electronic Science and Engineering, Nanjing University, Nanjing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University,Nanjing,China","institution_ids":["https://openalex.org/I881766915"]},{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019940239","display_name":"Jun Lin","orcid":"https://orcid.org/0000-0003-2760-4333"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Lin","raw_affiliation_strings":["School of Electronic Science and Engineering, Nanjing University,Nanjing,China","School of Electronic Science and Engineering, Nanjing University, Nanjing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University,Nanjing,China","institution_ids":["https://openalex.org/I881766915"]},{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.177,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.50365585,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"230","last_page":"233"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9836000204086304,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8275116086006165},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6700603365898132},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6676504611968994},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.6502922773361206},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5956331491470337},{"id":"https://openalex.org/keywords/systolic-array","display_name":"Systolic array","score":0.5290690064430237},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.49817562103271484},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.4978499412536621},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4797157645225525},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.47606557607650757},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.441053181886673},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.43462979793548584},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.40808233618736267},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33970504999160767},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.2836577594280243},{"id":"https://openalex.org/keywords/wireless","display_name":"Wireless","score":0.22459128499031067},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.20182812213897705},{"id":"https://openalex.org/keywords/very-large-scale-integration","display_name":"Very-large-scale integration","score":0.10050526261329651}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8275116086006165},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6700603365898132},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6676504611968994},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.6502922773361206},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5956331491470337},{"id":"https://openalex.org/C150741067","wikidata":"https://www.wikidata.org/wiki/Q2377218","display_name":"Systolic array","level":3,"score":0.5290690064430237},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.49817562103271484},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.4978499412536621},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4797157645225525},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.47606557607650757},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.441053181886673},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.43462979793548584},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.40808233618736267},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33970504999160767},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2836577594280243},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.22459128499031067},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.20182812213897705},{"id":"https://openalex.org/C14580979","wikidata":"https://www.wikidata.org/wiki/Q876049","display_name":"Very-large-scale integration","level":2,"score":0.10050526261329651},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/aicas54282.2022.9869938","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aicas54282.2022.9869938","pdf_url":null,"source":{"id":"https://openalex.org/S4363608281","display_name":"2022 IEEE 4th International Conference on Artificial Intelligence Circuits and Systems (AICAS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 4th International Conference on Artificial Intelligence Circuits and Systems (AICAS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8999999761581421,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W2119144962","https://openalex.org/W2314470091","https://openalex.org/W2917518248","https://openalex.org/W3016542674","https://openalex.org/W3016832937","https://openalex.org/W3091825101","https://openalex.org/W3102587717","https://openalex.org/W3155573568","https://openalex.org/W3196047573","https://openalex.org/W4287327026","https://openalex.org/W6783565172","https://openalex.org/W6790943123"],"related_works":["https://openalex.org/W2524802307","https://openalex.org/W2951390974","https://openalex.org/W2466675884","https://openalex.org/W2745117541","https://openalex.org/W2295680811","https://openalex.org/W2908855967","https://openalex.org/W3013366513","https://openalex.org/W3159273459","https://openalex.org/W3003304268","https://openalex.org/W3159906349"],"abstract_inverted_index":{"Convolutional":[0],"neural":[1],"network":[2],"(CNN)":[3],"training":[4,37,77,91,147],"is":[5,19,137],"computationally":[6],"intensive,":[7],"requiring":[8],"a":[9,20,32,47,93],"great":[10],"deal":[11],"of":[12,129],"time":[13],"and":[14,56,76,124,132],"resources.":[15],"Exploiting":[16],"data":[17,64],"sparsity":[18],"promising":[21],"method":[22],"to":[23],"ac-celerate":[24],"CNN":[25,90],"training.":[26],"In":[27],"this":[28,81],"work,":[29],"we":[30,84],"propose":[31],"novel":[33],"algorithm":[34],"for":[35,89],"sparse":[36],"processes":[38],"in":[39,46,127],"which":[40],"the":[41,52,62,69,101,107,145],"weight":[42],"matrices":[43],"are":[44],"pruned":[45],"fine-grained":[48],"block-wise":[49],"manner.":[50],"Both":[51],"forward":[53],"propagation":[54,58],"(FP)":[55],"backward":[57],"(BP)":[59],"phases":[60],"use":[61],"identical":[63],"layout.":[65],"It":[66],"can":[67,98],"eliminate":[68],"matrix":[70],"transposition":[71],"procedure,":[72],"reducing":[73],"storage":[74],"space":[75],"time.":[78],"Based":[79],"on":[80],"pruning":[82],"approach,":[83],"developed":[85],"an":[86],"FPGA-based":[87],"accelerator":[88],"using":[92],"systolic":[94],"array.":[95],"The":[96],"architecture":[97],"effectively":[99],"skip":[100],"zero":[102],"values":[103],"calculation":[104],"without":[105],"incurring":[106],"imbalance":[108],"between":[109],"different":[110],"processing":[111],"elements":[112],"(PEs).":[113],"Our":[114,135],"experimental":[115],"results":[116],"indicate":[117],"that":[118],"our":[119],"design":[120,136],"achieves":[121],"1.024":[122],"TOPS":[123],"118.4":[125],"GOPS/W":[126],"terms":[128],"computational":[130],"throughput":[131],"energy":[133,142],"efficiency.":[134],"1.41\u00d7":[138],"~":[139],"4.93\u00d7":[140],"more":[141],"efficient":[143],"than":[144],"state-of-the-art":[146],"accelerator.":[148]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
