{"id":"https://openalex.org/W4211085526","doi":"https://doi.org/10.1145/3490422.3502364","title":"FILM-QNN: Efficient FPGA Acceleration of Deep Neural Networks with Intra-Layer, Mixed-Precision Quantization","display_name":"FILM-QNN: Efficient FPGA Acceleration of Deep Neural Networks with Intra-Layer, Mixed-Precision Quantization","publication_year":2022,"publication_date":"2022-02-11","ids":{"openalex":"https://openalex.org/W4211085526","doi":"https://doi.org/10.1145/3490422.3502364"},"language":"en","primary_location":{"id":"doi:10.1145/3490422.3502364","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3490422.3502364","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083232044","display_name":"Mengshu Sun","orcid":"https://orcid.org/0000-0003-3540-1464"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mengshu Sun","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101633365","display_name":"Zhengang Li","orcid":"https://orcid.org/0000-0001-6644-4761"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhengang Li","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067662998","display_name":"Alec Lu","orcid":"https://orcid.org/0000-0002-3315-7368"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Alec Lu","raw_affiliation_strings":["Simon Fraser University, Burnaby, Canada"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University, Burnaby, Canada","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010773732","display_name":"Yanyu Li","orcid":"https://orcid.org/0000-0003-1240-4785"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanyu Li","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079385080","display_name":"Sung-En Chang","orcid":"https://orcid.org/0000-0001-8585-503X"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sung-En Chang","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016070401","display_name":"Xiaolong Ma","orcid":"https://orcid.org/0000-0003-3753-7648"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaolong Ma","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043582832","display_name":"Xue Lin","orcid":"https://orcid.org/0000-0001-6210-8883"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xue Lin","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065889904","display_name":"Zhenman Fang","orcid":"https://orcid.org/0000-0003-0603-9697"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Zhenman Fang","raw_affiliation_strings":["Simon Fraser University, Burnaby, Canada"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University, Burnaby, Canada","institution_ids":["https://openalex.org/I18014758"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5083232044"],"corresponding_institution_ids":["https://openalex.org/I12912129"],"apc_list":null,"apc_paid":null,"fwci":7.9469,"has_fulltext":false,"cited_by_count":81,"citation_normalized_percentile":{"value":0.98342744,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"134","last_page":"145"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8351311683654785},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.7438063621520996},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.7139400243759155},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6312556266784668},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6183607578277588},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5477877259254456},{"id":"https://openalex.org/keywords/stratix","display_name":"Stratix","score":0.5409994125366211},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4696490168571472},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.45621198415756226},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4521971344947815},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.44817766547203064},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.41548168659210205},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3743259906768799},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3018970489501953},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24144017696380615}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8351311683654785},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.7438063621520996},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.7139400243759155},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6312556266784668},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6183607578277588},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5477877259254456},{"id":"https://openalex.org/C2776277307","wikidata":"https://www.wikidata.org/wiki/Q22074755","display_name":"Stratix","level":3,"score":0.5409994125366211},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4696490168571472},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.45621198415756226},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4521971344947815},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.44817766547203064},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.41548168659210205},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3743259906768799},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3018970489501953},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24144017696380615},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3490422.3502364","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3490422.3502364","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.6100000143051147}],"awards":[{"id":"https://openalex.org/G8912281322","display_name":null,"funder_award_id":"CCF-1901378","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W569478347","https://openalex.org/W2094756095","https://openalex.org/W2300242332","https://openalex.org/W2565125333","https://openalex.org/W2585560244","https://openalex.org/W2593390416","https://openalex.org/W2612090114","https://openalex.org/W2613938013","https://openalex.org/W2615208831","https://openalex.org/W2616014673","https://openalex.org/W2739789140","https://openalex.org/W2761757071","https://openalex.org/W2762374354","https://openalex.org/W2884150179","https://openalex.org/W2887782013","https://openalex.org/W2899915146","https://openalex.org/W2901214301","https://openalex.org/W2904282303","https://openalex.org/W2943296352","https://openalex.org/W2962728029","https://openalex.org/W2962761403","https://openalex.org/W2962818002","https://openalex.org/W2963367920","https://openalex.org/W2963480671","https://openalex.org/W2982041622","https://openalex.org/W2982644126","https://openalex.org/W2998183051","https://openalex.org/W3004061291","https://openalex.org/W3101086546","https://openalex.org/W3132370594","https://openalex.org/W3158020960","https://openalex.org/W4247198796","https://openalex.org/W4366658060"],"related_works":["https://openalex.org/W1988651200","https://openalex.org/W2532502681","https://openalex.org/W2518118925","https://openalex.org/W2991370896","https://openalex.org/W3046471834","https://openalex.org/W4253069360","https://openalex.org/W2765235648","https://openalex.org/W3176282186","https://openalex.org/W4319952061","https://openalex.org/W2789072850"],"abstract_inverted_index":{"With":[0],"the":[1,54,61,90,101,120,136,155,159,171,192,203,232,259,263,281],"trend":[2],"to":[3,23,75,153,169,190,201],"deploy":[4],"Deep":[5],"Neural":[6],"Network":[7],"(DNN)":[8],"inference":[9,63],"models":[10,81],"on":[11,215,278],"edge":[12],"devices":[13],"with":[14,119,158,234],"limited":[15],"resources,":[16],"quantization":[17,34,37,94],"techniques":[18,134],"have":[19],"been":[20],"widely":[21],"used":[22],"reduce":[24],"on-chip":[25,186],"storage":[26,187],"and":[27,60,77,110,125,150,178,185,218,228,241,246,256,268,275],"improve":[28,202],"computation":[29,58,175],"throughput.":[30],"However,":[31],"existing":[32],"DNN":[33,80],"work":[35],"deploying":[36],"below":[38],"8-bit":[39,244,264],"may":[40],"be":[41],"either":[42],"suffering":[43],"from":[44,115],"evident":[45],"accuracy":[46,124,253],"loss":[47],"or":[48],"facing":[49],"a":[50,70,163],"big":[51],"gap":[52],"between":[53],"theoretical":[55],"improvement":[56],"of":[57,103,122,142,173,205,225,238,243],"throughput":[59,157,270],"practical":[62],"speedup.":[64],"In":[65],"this":[66],"work,":[67],"we":[68,88,130,207],"propose":[69,89],"general":[71],"framework,":[72],"called":[73],"FILM-QNN,":[74,206],"quantize":[76],"accelerate":[78,191],"multiple":[79,132],"across":[82],"different":[83,98],"embedded":[84],"FPGA":[85,137,174,220],"devices.":[86],"First,":[87],"novel":[91],"intra-layer,":[92,235],"mixed-precision":[93,236],"algorithm":[95],"that":[96,231],"assigns":[97],"precisions":[99,196],"onto":[100],"filters":[102],"each":[104,198],"layer.":[105,199],"The":[106],"candidate":[107],"precision":[108],"levels":[109],"assignment":[111],"granularity":[112],"are":[113],"determined":[114],"our":[116],"empirical":[117],"study":[118],"capability":[121],"preserving":[123],"improving":[126],"hardware":[127],"parallelism.":[128],"Second,":[129],"apply":[131],"optimization":[133],"for":[135,258],"accelerator":[138],"architecture":[139],"in":[140,194],"support":[141],"quantized":[143],"computations,":[144],"including":[145],"DSP":[146],"packing,":[147,152],"weight":[148],"reordering,":[149],"data":[151,183],"enhance":[154],"overall":[156],"available":[160],"resources.":[161],"Moreover,":[162],"comprehensive":[164],"resource":[165],"model":[166],"is":[167],"developed":[168],"balance":[170],"allocation":[172],"resources":[176,188],"(LUTs":[177],"DSPs)":[179],"as":[180,182,262,280],"well":[181],"transfer":[184],"(BRAMs)":[189],"computations":[193],"mixed":[195],"within":[197],"Finally,":[200],"portability":[204],"implement":[208],"it":[209],"using":[210],"Vivado":[211],"High-Level":[212],"Synthesis":[213],"(HLS)":[214],"Xilinx":[216],"PYNQ-Z2":[217],"ZCU102":[219],"boards.":[221],"Our":[222],"experimental":[223],"results":[224],"ResNet-18,":[226],"ResNet-50,":[227],"MobileNet-V2":[229],"demonstrate":[230],"implementations":[233],"(95%":[237],"4-bit":[239,282],"weights":[240],"5%":[242],"weights,":[245],"all":[247],"5-bit":[248],"activations)":[249],"can":[250],"achieve":[251],"comparable":[252,269],"(70.47%,":[254],"77.25%,":[255],"65.67%":[257],"three":[260],"models)":[261],"(and":[265],"32-bit)":[266],"versions":[267],"(214.8":[271],"FPS,":[272,274],"109.1":[273],"537.9":[276],"FPS":[277],"ZCU102)":[279],"designs.":[283]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":30},{"year":2024,"cited_by_count":28},{"year":2023,"cited_by_count":14},{"year":2022,"cited_by_count":6}],"updated_date":"2026-03-28T08:17:26.163206","created_date":"2025-10-10T00:00:00"}
