{"id":"https://openalex.org/W4388505151","doi":"https://doi.org/10.1109/tcad.2023.3331225","title":"Flexible and Efficient Convolutional Acceleration on Unified Hardware Using the Two-Stage Splitting Method and Layer-Adaptive Allocation of 1-D/2-D Winograd Units","display_name":"Flexible and Efficient Convolutional Acceleration on Unified Hardware Using the Two-Stage Splitting Method and Layer-Adaptive Allocation of 1-D/2-D Winograd Units","publication_year":2023,"publication_date":"2023-11-08","ids":{"openalex":"https://openalex.org/W4388505151","doi":"https://doi.org/10.1109/tcad.2023.3331225"},"language":"en","primary_location":{"id":"doi:10.1109/tcad.2023.3331225","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2023.3331225","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100719382","display_name":"Chen Yang","orcid":"https://orcid.org/0000-0002-8221-7670"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chen Yang","raw_affiliation_strings":["School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0000-0002-8221-7670","affiliations":[{"raw_affiliation_string":"School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036704783","display_name":"Yaoyao Yang","orcid":"https://orcid.org/0000-0002-9786-1194"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaoyao Yang","raw_affiliation_strings":["School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046312537","display_name":"Yishuo Meng","orcid":"https://orcid.org/0000-0002-3402-6386"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yishuo Meng","raw_affiliation_strings":["School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058905232","display_name":"Kaibo Huo","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaibo Huo","raw_affiliation_strings":["School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021185664","display_name":"Siwei Xiang","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siwei Xiang","raw_affiliation_strings":["School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080354346","display_name":"Jianfei Wang","orcid":"https://orcid.org/0009-0004-0132-3319"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianfei Wang","raw_affiliation_strings":["School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0009-0004-0132-3319","affiliations":[{"raw_affiliation_string":"School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043300709","display_name":"Li Geng","orcid":"https://orcid.org/0000-0003-4002-9281"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Geng","raw_affiliation_strings":["School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0000-0003-4002-9281","affiliations":[{"raw_affiliation_string":"School of Microelectronics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100719382"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":1.0597,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.79779959,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"43","issue":"3","first_page":"919","last_page":"932"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9736999869346619,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9686999917030334,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.705737292766571},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6273440718650818},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.6174574494361877},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.5897036790847778},{"id":"https://openalex.org/keywords/digital-signal-processing","display_name":"Digital signal processing","score":0.5291913747787476},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.5122915506362915},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5006062984466553},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.46552225947380066},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.46273764967918396},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.4402885437011719},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.42462635040283203},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.36566171050071716},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3403969705104828},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.33102643489837646},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.31602221727371216},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3024240732192993},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.10065597295761108}],"concepts":[{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.705737292766571},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6273440718650818},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.6174574494361877},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.5897036790847778},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.5291913747787476},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.5122915506362915},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5006062984466553},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.46552225947380066},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.46273764967918396},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.4402885437011719},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.42462635040283203},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.36566171050071716},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3403969705104828},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.33102643489837646},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.31602221727371216},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3024240732192993},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.10065597295761108},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcad.2023.3331225","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2023.3331225","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.5799999833106995,"id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G963865091","display_name":null,"funder_award_id":"62176206","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1574901103","https://openalex.org/W1686810756","https://openalex.org/W2032524321","https://openalex.org/W2084003578","https://openalex.org/W2172654076","https://openalex.org/W2194775991","https://openalex.org/W2525740295","https://openalex.org/W2616318524","https://openalex.org/W2627042741","https://openalex.org/W2768993447","https://openalex.org/W2792503273","https://openalex.org/W2808463410","https://openalex.org/W2808739938","https://openalex.org/W2913221350","https://openalex.org/W2913573286","https://openalex.org/W2917087921","https://openalex.org/W2964010167","https://openalex.org/W2964525696","https://openalex.org/W2974135602","https://openalex.org/W2977634443","https://openalex.org/W3000160544","https://openalex.org/W3004390962","https://openalex.org/W3018618942","https://openalex.org/W3021847282","https://openalex.org/W3043504674","https://openalex.org/W3114086884","https://openalex.org/W3159322265","https://openalex.org/W3162753525","https://openalex.org/W3171251238","https://openalex.org/W3193946904","https://openalex.org/W3217077065","https://openalex.org/W3217153855","https://openalex.org/W4206551567","https://openalex.org/W4211170619","https://openalex.org/W4226236879","https://openalex.org/W4231896027","https://openalex.org/W4238771304","https://openalex.org/W4289656095","https://openalex.org/W4316022315","https://openalex.org/W4319865680","https://openalex.org/W4360770687","https://openalex.org/W4384788057","https://openalex.org/W6763741422","https://openalex.org/W6790409148"],"related_works":["https://openalex.org/W4327521644","https://openalex.org/W2978884468","https://openalex.org/W3132558499","https://openalex.org/W2005846134","https://openalex.org/W2017990332","https://openalex.org/W2080337923","https://openalex.org/W2169963286","https://openalex.org/W1488776355","https://openalex.org/W2093251826","https://openalex.org/W4244262766"],"abstract_inverted_index":{"General":[0],"convolution":[1],"acceleration,":[2],"such":[3],"as":[4],"Winograd":[5,64,138],"and":[6,54,56,72,134,155,168,177],"FFT,":[7],"is":[8,81,100,130,160],"a":[9,46,57,76,95,108,121,126],"promising":[10],"direction":[11],"to":[12,83,88,102,132,183],"address":[13],"the":[14,24,40,85,90,104,136,164],"computational":[15],"complexity":[16],"of":[17,26,32,92],"current":[18],"convolutional":[19,115],"neural":[20],"networks":[21],"(CNNs).":[22],"However,":[23],"flexibility":[25],"these":[27],"CNNs":[28],"makes":[29],"this":[30,44],"kind":[31],"scheme":[33,99],"always":[34],"introduce":[35],"massive":[36],"redundant":[37,105],"computations,":[38],"damaging":[39],"acceleration":[41],"effect.":[42],"In":[43],"article,":[45],"two-stage":[47],"splitting":[48,98],"method":[49,80,129],"for":[50,172],"arbitrarily":[51],"sized":[52],"tensors":[53,87],"filters":[55],"unified":[58,73,122],"hardware":[59,123],"architecture":[60,124,159],"using":[61],"layer-adaptive":[62,127],"allocated":[63],"units":[65,154],"are":[66],"proposed,":[67],"achieving":[68],"effective":[69],"redundance":[70],"elimination":[71],"architecture.":[74],"First,":[75],"tensor":[77],"adaptive":[78],"presplitting":[79],"proposed":[82,131],"divide":[84],"original":[86],"match":[89],"rule":[91],"Winograd.":[93],"Furthermore,":[94],"Winograd-based":[96],"extended":[97],"designed":[101],"reduce":[103],"calculations;":[106],"therefore,":[107],"substantial":[109],"reduction":[110],"in":[111,114,192],"multiplication":[112],"operations":[113],"layers":[116],"achieved":[117],"30.6%\u201375%":[118],"savings.":[119],"Finally,":[120],"with":[125,196],"allocation":[128],"evaluate":[133],"select":[135],"optimal":[137],"F(":[139],"<inline-formula":[140,147,184],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[141,148,185],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[142,149,186],"<tex-math":[143,150,187],"notation=\"LaTeX\">${m}$":[144],"</tex-math></inline-formula>":[145,152,190],",":[146],"notation=\"LaTeX\">${r}$":[151],")":[153],"input/output":[156],"parallelisms.":[157],"This":[158],"evaluated":[161],"based":[162],"on":[163],"Xilinx":[165],"XCVU9P":[166],"platform":[167],"achieves":[169,181],"1.97/1.23/1.60/1.25":[170],"GOPS/DSP":[171],"AlexNet,":[173],"VGG16,":[174,176],"modified":[175],"ResNet18,":[178],"respectively.":[179],"It":[180],"up":[182],"notation=\"LaTeX\">$5.81\\times":[188],"$":[189],"improvements":[191],"DSP":[193],"efficiency":[194],"compared":[195],"previous":[197],"FPGA-based":[198],"designs.":[199]},"counts_by_year":[{"year":2025,"cited_by_count":9}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
