{"id":"https://openalex.org/W4391387876","doi":"https://doi.org/10.1145/3643682","title":"Winols: A Large-Tiling Sparse Winograd CNN Accelerator on FPGAs","display_name":"Winols: A Large-Tiling Sparse Winograd CNN Accelerator on FPGAs","publication_year":2024,"publication_date":"2024-01-31","ids":{"openalex":"https://openalex.org/W4391387876","doi":"https://doi.org/10.1145/3643682"},"language":"en","primary_location":{"id":"doi:10.1145/3643682","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3643682","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3643682","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3643682","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102770065","display_name":"Kunpeng Xie","orcid":"https://orcid.org/0000-0001-9857-5352"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kunpeng Xie","raw_affiliation_strings":["Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China"],"raw_orcid":"https://orcid.org/0000-0001-9857-5352","affiliations":[{"raw_affiliation_string":"Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023557586","display_name":"Ye Lu","orcid":"https://orcid.org/0000-0003-0805-6394"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Lu","raw_affiliation_strings":["Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China"],"raw_orcid":"https://orcid.org/0000-0003-0805-6394","affiliations":[{"raw_affiliation_string":"Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084053097","display_name":"Xinyu He","orcid":"https://orcid.org/0000-0001-9540-2093"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyu He","raw_affiliation_strings":["Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China"],"raw_orcid":"https://orcid.org/0000-0001-9540-2093","affiliations":[{"raw_affiliation_string":"Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092966843","display_name":"Dezhi Yi","orcid":"https://orcid.org/0009-0004-3553-734X"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dezhi Yi","raw_affiliation_strings":["Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China"],"raw_orcid":"https://orcid.org/0009-0004-3553-734X","affiliations":[{"raw_affiliation_string":"Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100389362","display_name":"Huijuan Dong","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huijuan Dong","raw_affiliation_strings":["Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China"],"raw_orcid":"https://orcid.org/0009-0009-0397-4875","affiliations":[{"raw_affiliation_string":"Nankai University, Tianjin Key Laboratory of Network and Data Security Technology, and the Key Laboratory of Data and Intelligent System Security, Ministry of Education, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100394256","display_name":"Yao Chen","orcid":"https://orcid.org/0000-0002-5798-2282"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yao Chen","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-5798-2282","affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102770065"],"corresponding_institution_ids":["https://openalex.org/I205237279"],"apc_list":null,"apc_paid":null,"fwci":1.727,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.83780758,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"21","issue":"2","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12162","display_name":"Cellular Automata and Applications","score":0.9793000221252441,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8463223576545715},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6053279638290405},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5613878965377808},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.45922771096229553},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.38341131806373596},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.34334927797317505},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.342970073223114},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.2558513879776001}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8463223576545715},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6053279638290405},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5613878965377808},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.45922771096229553},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.38341131806373596},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34334927797317505},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.342970073223114},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2558513879776001}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3643682","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3643682","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3643682","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3643682","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3643682","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3643682","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.8999999761581421,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G1034613892","display_name":null,"funder_award_id":"62372253, 62002175","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4605561969","display_name":null,"funder_award_id":"CARCHB202016","funder_id":"https://openalex.org/F4320335561","funder_display_name":"Institute of Computing Technology, Chinese Academy of Sciences"},{"id":"https://openalex.org/G8653565265","display_name":null,"funder_award_id":"CARCHB202016","funder_id":"https://openalex.org/F4320321133","funder_display_name":"Chinese Academy of Sciences"}],"funders":[{"id":"https://openalex.org/F4320318547","display_name":"Baidu","ror":"https://ror.org/03vs3wt56"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"},{"id":"https://openalex.org/F4320323993","display_name":"Natural Science Foundation of Tianjin City","ror":null},{"id":"https://openalex.org/F4320335561","display_name":"Institute of Computing Technology, Chinese Academy of Sciences","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4391387876.pdf","grobid_xml":"https://content.openalex.org/works/W4391387876.grobid-xml"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W182691100","https://openalex.org/W1574447377","https://openalex.org/W2163027735","https://openalex.org/W2289252105","https://openalex.org/W2592875630","https://openalex.org/W2617247391","https://openalex.org/W2626616508","https://openalex.org/W2729080111","https://openalex.org/W2896983500","https://openalex.org/W2910323801","https://openalex.org/W2940399336","https://openalex.org/W2942063194","https://openalex.org/W2974514820","https://openalex.org/W2977634443","https://openalex.org/W3000207760","https://openalex.org/W3018105153","https://openalex.org/W3038838661","https://openalex.org/W3093577244","https://openalex.org/W3131926233","https://openalex.org/W3134012069","https://openalex.org/W3139203094","https://openalex.org/W3187908937","https://openalex.org/W3193859704","https://openalex.org/W3198679933","https://openalex.org/W3199348509","https://openalex.org/W3210447451","https://openalex.org/W4206230517","https://openalex.org/W4225426816","https://openalex.org/W4230841294","https://openalex.org/W4239088979","https://openalex.org/W4245602507","https://openalex.org/W4285056663","https://openalex.org/W4285113157","https://openalex.org/W4300865759","https://openalex.org/W4302296459","https://openalex.org/W4378573650","https://openalex.org/W6637151318"],"related_works":["https://openalex.org/W2111241003","https://openalex.org/W4200391368","https://openalex.org/W2210979487","https://openalex.org/W2074043759","https://openalex.org/W3042736233","https://openalex.org/W2082487009","https://openalex.org/W2373535795","https://openalex.org/W2406926880","https://openalex.org/W4237139544","https://openalex.org/W2405661381"],"abstract_inverted_index":{"Convolutional":[0],"Neural":[1],"Networks":[2],"(CNNs)":[3],"can":[4],"benefit":[5],"from":[6],"the":[7,12,22,45,60,92,95,167,217],"computational":[8],"reductions":[9],"provided":[10],"by":[11,179,197,208],"Winograd":[13,46,79,97,123,148,168,192],"minimal":[14],"filtering":[15],"algorithm":[16],"and":[17,34,68,110,122,150,202,205,211,219,231],"weight":[18,128,163],"pruning.":[19],"However,":[20],"harnessing":[21],"potential":[23],"of":[24,94,182,200],"both":[25,120],"methods":[26],"simultaneously":[27],"introduces":[28,83],"complexity":[29],"in":[30,44,65,73,166,184],"designing":[31],"pruning":[32,114,157],"algorithms":[33],"accelerators.":[35],"Prior":[36],"studies":[37],"aimed":[38],"to":[39,161],"establish":[40],"regular":[41],"sparsity":[42,61,118,165],"patterns":[43],"domain,":[47],"but":[48],"they":[49],"were":[50],"primarily":[51],"suited":[52],"for":[53,77,143],"small":[54],"tiles,":[55],"with":[56,146,189,216,224],"domain":[57,69,108,169],"transformation":[58,70,109],"dictating":[59],"ratio.":[62],"The":[63],"irregularities":[64],"data":[66],"access":[67],"pose":[71],"challenges":[72],"accelerator":[74,142,175,178,223],"design,":[75],"especially":[76],"larger":[78],"tiles.":[80],"This":[81],"paper":[82],"\u201cWinols,\u201d":[84],"an":[85,140,198],"innovative":[86],"algorithm-hardware":[87],"co-design":[88],"strategy":[89],"that":[90,116],"emphasizes":[91],"strengths":[93],"large-tiling":[96],"algorithm.":[98],"Through":[99],"a":[100,112,132,180],"spatial-to-Winograd":[101],"relevance":[102],"degree":[103],"evaluation,":[104],"we":[105,130],"extensively":[106],"explore":[107],"propose":[111],"cross-domain":[113],"technique":[115],"retains":[117],"across":[119],"spatial":[121],"domains.":[124],"To":[125],"compress":[126],"pruned":[127],"matrices,":[129],"invent":[131],"relative":[133],"column":[134],"encoding":[135],"scheme.":[136],"We":[137],"further":[138],"design":[139],"FPGA-based":[141],"CNN":[144],"models":[145],"large":[147],"tiles":[149],"sparse":[151,191],"matrix-vector":[152],"operations.":[153],"Evaluations":[154],"indicate":[155],"our":[156],"method":[158],"achieves":[159,229],"up":[160],"80%":[162],"tile":[164,225],"without":[170],"compromising":[171],"accuracy.":[172],"Our":[173],"Winols":[174,194,222],"outperforms":[176],"dense":[177],"factor":[181],"31.7\u00d7":[183],"inference":[185],"latency.":[186],"When":[187,214],"compared":[188,215],"prevailing":[190],"accelerators,":[193],"reduces":[195],"latency":[196],"average":[199],"10.9\u00d7,":[201],"improves":[203],"DSP":[204],"energy":[206,233],"efficiencies":[207],"over":[209],"5.6\u00d7":[210],"5.7\u00d7,":[212],"respectively.":[213,236],"CPU":[218],"GPU":[220],"platform,":[221],"size":[226],"8\u00d7":[227],"8":[228],"24.6\u00d7":[230],"2.84\u00d7":[232],"efficiency":[234],"improvements,":[235]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":2}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
