{"id":"https://openalex.org/W7131089028","doi":"https://doi.org/10.1109/tvlsi.2026.3663320","title":"Efficient FPGA Acceleration for 4-bit CNNs via Quantization-Induced Structured Sparsity and LUT-Based Multiplication","display_name":"Efficient FPGA Acceleration for 4-bit CNNs via Quantization-Induced Structured Sparsity and LUT-Based Multiplication","publication_year":2026,"publication_date":"2026-02-23","ids":{"openalex":"https://openalex.org/W7131089028","doi":"https://doi.org/10.1109/tvlsi.2026.3663320"},"language":null,"primary_location":{"id":"doi:10.1109/tvlsi.2026.3663320","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2026.3663320","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yuanmiao Lin","orcid":"https://orcid.org/0009-0005-9106-5989"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanmiao Lin","raw_affiliation_strings":["School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0009-0005-9106-5989","affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126642285","display_name":"Peng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Zhang","raw_affiliation_strings":["School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0009-0009-4269-1958","affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zijian Zhang","orcid":"https://orcid.org/0009-0009-8804-2312"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zijian Zhang","raw_affiliation_strings":["School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0009-0009-8804-2312","affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xueming Li","orcid":"https://orcid.org/0000-0002-9700-4272"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueming Li","raw_affiliation_strings":["School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-9700-4272","affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126633501","display_name":"Hongmin Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210122543","display_name":"Guangdong Polytechnic Normal University","ror":"https://ror.org/02pcb5m77","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210122543"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongmin Huang","raw_affiliation_strings":["School of Electronics and Information, Guangdong Polytechnic Normal University, Guangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electronics and Information, Guangdong Polytechnic Normal University, Guangzhou, China","institution_ids":["https://openalex.org/I4210122543"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080905648","display_name":"Ruidian Zhan","orcid":"https://orcid.org/0000-0002-1918-3375"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruidian Zhan","raw_affiliation_strings":["School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-1918-3375","affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002676085","display_name":"Xianghong Hu","orcid":"https://orcid.org/0000-0002-1237-4945"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianghong Hu","raw_affiliation_strings":["School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-1237-4945","affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121707908","display_name":"Shuting Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuting Cai","raw_affiliation_strings":["School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-2842-6439","affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126627664","display_name":"Xiaoming Xiong","orcid":null},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoming Xiong","raw_affiliation_strings":["School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-2421-7621","affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28621581,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"34","issue":"5","first_page":"1677","last_page":"1681"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13182","display_name":"Quantum-Dot Cellular Automata","score":0.1453000009059906,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13182","display_name":"Quantum-Dot Cellular Automata","score":0.1453000009059906,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11693","display_name":"Cryptography and Residue Arithmetic","score":0.10819999873638153,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11522","display_name":"VLSI and FPGA Design Techniques","score":0.08940000087022781,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.7436000108718872},{"id":"https://openalex.org/keywords/lookup-table","display_name":"Lookup table","score":0.6607999801635742},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6431999802589417},{"id":"https://openalex.org/keywords/multiplier","display_name":"Multiplier (economics)","score":0.576200008392334},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5673999786376953},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4864000082015991},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4652000069618225},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.40860000252723694}],"concepts":[{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.7436000108718872},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7390000224113464},{"id":"https://openalex.org/C134835016","wikidata":"https://www.wikidata.org/wiki/Q690265","display_name":"Lookup table","level":2,"score":0.6607999801635742},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6431999802589417},{"id":"https://openalex.org/C124584101","wikidata":"https://www.wikidata.org/wiki/Q1053266","display_name":"Multiplier (economics)","level":2,"score":0.576200008392334},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5673999786376953},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.49630001187324524},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4864000082015991},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4652000069618225},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.40860000252723694},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.357699990272522},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.34279999136924744},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.334199994802475},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.3276999890804291},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.31949999928474426},{"id":"https://openalex.org/C164620267","wikidata":"https://www.wikidata.org/wiki/Q376953","display_name":"Adder","level":3,"score":0.31299999356269836},{"id":"https://openalex.org/C106516650","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm design","level":2,"score":0.28600001335144043},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.2718000113964081},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C131017901","wikidata":"https://www.wikidata.org/wiki/Q170451","display_name":"Logic gate","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C77390884","wikidata":"https://www.wikidata.org/wiki/Q217302","display_name":"Application-specific integrated circuit","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.26170000433921814},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tvlsi.2026.3663320","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2026.3663320","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.545375406742096,"display_name":"Affordable and clean energy"}],"awards":[{"id":"https://openalex.org/G1467150770","display_name":null,"funder_award_id":"2023B01J0007","funder_id":"https://openalex.org/F4320327796","funder_display_name":"Science and Technology Planning Project of Fuzhou"},{"id":"https://openalex.org/G6869640974","display_name":null,"funder_award_id":"62301165","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320327796","display_name":"Science and Technology Planning Project of Fuzhou","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W3158020960","https://openalex.org/W3180306599","https://openalex.org/W3185702163","https://openalex.org/W4312857960","https://openalex.org/W4321637212","https://openalex.org/W4387623777","https://openalex.org/W4390771525","https://openalex.org/W4405518161","https://openalex.org/W4405520952","https://openalex.org/W4409248726","https://openalex.org/W4410343044","https://openalex.org/W4410537662","https://openalex.org/W4411726645","https://openalex.org/W4416429913"],"related_works":[],"abstract_inverted_index":{"N:M":[0,31,45,101,120],"structured":[1,78],"sparsity":[2,32,46,102,121],"is":[3],"key":[4],"to":[5,138,141],"convolutional":[6],"neural":[7],"network":[8],"(CNN)":[9],"compression":[10],"and":[11,76,199],"acceleration,":[12],"but":[13],"two":[14],"challenges":[15],"remain.":[16],"From":[17,37],"the":[18,38,41,110,128,196,200],"algorithm":[19,97,198],"perspective,":[20,40],"prior":[21],"works":[22],"have":[23,47],"mainly":[24],"focused":[25],"on":[26,161],"8-bit":[27],"quantized":[28],"models,":[29],"where":[30],"yields":[33],"limited":[34],"hardware":[35,39,111],"efficiency.":[36],"cost":[42,135],"differences":[43],"across":[44],"not":[48],"been":[49],"analyzed.":[50],"To":[51],"address":[52],"these":[53],"issues,":[54],"we":[55,144],"present":[56],"a":[57,94,146,162],"unified":[58],"algorithm\u2013hardware":[59],"co-design":[60],"framework":[61],"for":[62],"4-bit":[63,69,147],"CNN":[64],"acceleration.":[65],"We":[66,92,107],"show":[67],"that":[68,98,127],"quantization":[70],"induces":[71],"over":[72,81,190],"80%":[73],"zero":[74],"weights":[75],"strongly":[77],"sparsity,":[79],"with":[80,103],"95%":[82],"of":[83,113,195,202],"weight":[84],"groups":[85],"satisfying":[86],"4:8,":[87],"8:16,":[88,124],"or":[89],"16:32":[90],"patterns.":[91],"propose":[93],"pruning-after-quantization":[95],"(PAQ)":[96],"enforces":[99],"strict":[100],"minimal":[104],"accuracy":[105],"loss.":[106],"also":[108],"analyze":[109],"overhead":[112],"activation":[114],"fetch":[115],"units":[116],"(AFUs)":[117],"under":[118],"different":[119],"patterns":[122],"(4:8,":[123],"16:32),":[125],"revealing":[126],"4:8":[129],"AFU":[130],"reduces":[131],"look-up":[132],"table":[133],"(LUT)":[134],"by":[136],"up":[137],"66.7%":[139],"compared":[140],"16:32.":[142],"Finally,":[143],"introduce":[145],"LUT-based":[148],"sign-magnitude":[149],"multiplier":[150],"(LBSMM)":[151],"requiring":[152],"only":[153],"11":[154],"LUT6":[155],"resources,":[156],"outperforming":[157],"existing":[158],"multipliers.":[159],"Integrated":[160],"Xilinx":[163],"VCU118":[164],"field-programmable":[165],"gate":[166],"array":[167],"(FPGA),":[168],"our":[169],"accelerator":[170],"achieves":[171],"<inline-formula":[172],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[173,179,208],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[174,180],"<tex-math":[175,181],"notation=\"LaTeX\">$2.51\\times":[176],"$</tex-math>":[177,183],"</inline-formula>\u2013<inline-formula":[178],"notation=\"LaTeX\">$12.89\\times":[182],"</inline-formula>":[184],"improvements":[185],"in":[186],"equivalent":[187],"LUT":[188],"efficiency":[189],"SOTA":[191],"designs.":[192],"The":[193],"implementations":[194],"PAQ":[197],"RTL":[201],"LBSMM":[203],"are":[204],"available":[205],"at":[206],"<uri":[207],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/haden-01/PAQ-and-LBSMM.git</uri>":[209]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-24T00:00:00"}
