{"id":"https://openalex.org/W2792503273","doi":"https://doi.org/10.1145/3174243.3174265","title":"A Framework for Generating High Throughput CNN Implementations on FPGAs","display_name":"A Framework for Generating High Throughput CNN Implementations on FPGAs","publication_year":2018,"publication_date":"2018-02-15","ids":{"openalex":"https://openalex.org/W2792503273","doi":"https://doi.org/10.1145/3174243.3174265","mag":"2792503273"},"language":"en","primary_location":{"id":"doi:10.1145/3174243.3174265","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3174243.3174265","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014103790","display_name":"Hanqing Zeng","orcid":"https://orcid.org/0000-0002-2578-2147"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hanqing Zeng","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070056530","display_name":"Ren Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ren Chen","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100458134","display_name":"Chi Zhang","orcid":"https://orcid.org/0000-0001-7374-1940"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chi Zhang","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033166029","display_name":"Viktor K. Prasanna","orcid":"https://orcid.org/0000-0002-1609-8589"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Viktor Prasanna","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5014103790"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":7.2076,"has_fulltext":false,"cited_by_count":95,"citation_normalized_percentile":{"value":0.97815685,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"117","last_page":"126"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8486407995223999},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.6742000579833984},{"id":"https://openalex.org/keywords/design-space-exploration","display_name":"Design space exploration","score":0.6241253018379211},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6163501143455505},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5337193012237549},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5177866220474243},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5082928538322449},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.495898574590683},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.49291276931762695},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.43893197178840637},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.4154050350189209},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3958284556865692},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.28028711676597595},{"id":"https://openalex.org/keywords/wireless","display_name":"Wireless","score":0.11227008700370789}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8486407995223999},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.6742000579833984},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.6241253018379211},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6163501143455505},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5337193012237549},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5177866220474243},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5082928538322449},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.495898574590683},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.49291276931762695},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.43893197178840637},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.4154050350189209},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3958284556865692},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.28028711676597595},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.11227008700370789},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3174243.3174265","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3174243.3174265","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4699999988079071,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1686810756","https://openalex.org/W1969057818","https://openalex.org/W2094756095","https://openalex.org/W2097117768","https://openalex.org/W2106805276","https://openalex.org/W2136952590","https://openalex.org/W2171656599","https://openalex.org/W2276486856","https://openalex.org/W2523838129","https://openalex.org/W2584311934","https://openalex.org/W2584616277","https://openalex.org/W2585774018","https://openalex.org/W2618530766","https://openalex.org/W2626712922","https://openalex.org/W2742152118","https://openalex.org/W2786845740","https://openalex.org/W2949245006","https://openalex.org/W2952632681","https://openalex.org/W4248003330","https://openalex.org/W4387269757"],"related_works":["https://openalex.org/W4313341326","https://openalex.org/W4282568311","https://openalex.org/W4313484792","https://openalex.org/W2951473296","https://openalex.org/W2883928845","https://openalex.org/W4288420200","https://openalex.org/W4285346947","https://openalex.org/W3145095675","https://openalex.org/W3205973659","https://openalex.org/W4365793791"],"abstract_inverted_index":{"We":[0,75,129,162,182,208],"propose":[1,76],"a":[2,27,35,54,77,107,132,179,200],"framework":[3,14,198,211],"to":[4,52,105,139,202,248],"generate":[5,204],"highly":[6],"efficient":[7,31],"accelerators":[8,216],"for":[9,20,30,37,119,153,217,240],"inferencing":[10],"on":[11,91,189,221],"FPGAs.":[12],"Our":[13,147,197],"consists":[15],"of":[16,46,57,232],"multiple":[17],"algorithmic":[18,42],"optimizations":[19,43],"computation":[21,71,90,117,193],"complexity":[22,118],"and":[23,34,114,125,174,194,237,243,251],"communication":[24,195],"volume":[25],"reduction,":[26],"mapping":[28,148],"methodology":[29,149],"resource":[32],"utilization,":[33],"tool":[36,201],"automatic":[38],"\\textttVerilog":[39],"generation.":[40],"The":[41,95],"improve":[44],"throughput":[45,142,215,231],"frequency":[47,134],"domain":[48,135],"convolution":[49],"so":[50],"as":[51],"satisfy":[53],"given":[55],"set":[56],"hardware":[58],"constraints.":[59],"While":[60],"the":[61,73,88,92,151,154,184,190,210,257],"Overlap-and-Add":[62],"(OaA)":[63],"technique":[64,138],"has":[65],"been":[66],"known,":[67],"it":[68],"performs":[69],"\"wasted\"":[70,89],"at":[72,111],"edges.":[74],"novel":[78,133],"Concatenate-and-Pad":[79],"(CaP)":[80],"technique,":[81],"which":[82],"improves":[83],"OaA":[84,102],"significantly":[85],"by":[86,143,157,166,212],"reducing":[87],"padded":[93],"pixels.":[94],"proposed":[96],"CaP":[97],"used":[98],"in":[99],"conjunction":[100],"with":[101,121,256],"enables":[103],"us":[104],"choose":[106],"fixed":[108],"FFT":[109],"size":[110,173],"design":[112,159],"time,":[113],"achieve":[115,230],"low":[116],"layers":[120],"various":[122],"image":[123],"sizes":[124],"kernel":[126],"window":[127],"sizes.":[128],"also":[130],"develop":[131],"loop":[136],"tiling":[137],"further":[140],"boost":[141],"improving":[144],"data":[145],"reuse.":[146],"optimizes":[150],"architecture":[152],"target":[155],"device":[156,180],"fast":[158],"space":[160],"exploration.":[161],"quantitatively":[163],"categorize":[164],"FPGAs":[165],"capturing":[167],"their":[168],"DSP":[169],"resources,":[170],"on-chip":[171],"memory":[172,176],"external":[175],"bandwidth":[177],"into":[178],"coefficient.":[181],"identify":[183],"optimal":[185],"architectural":[186],"parameters":[187],"based":[188],"tradeoff":[191],"between":[192],"cost.":[196],"includes":[199],"automatically":[203],"fully":[205],"synthesizable":[206],"\\textttVerilog.":[207],"demonstrate":[209],"generating":[213],"high":[214],"state-of-the-art":[218,258],"CNN":[219],"models":[220],"Intel":[222],"HARP":[223],"heterogeneous":[224],"platform.":[225],"Using":[226],"our":[227],"framework,":[228],"we":[229],"$780.6$":[233],"$GOPS$,":[234],"$669.1$":[235],"$GOPS$":[236,239],"$552.1$":[238],"AlexNet,":[241],"VGG16":[242],"FCN-16s":[244],"respectively.":[245],"These":[246],"correspond":[247],"$6.8\\times$":[249],"(AlexNet)":[250],"$4.9\\times$":[252],"(VGG16)":[253],"improvement":[254],"compared":[255],"implementations.":[259]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":15},{"year":2021,"cited_by_count":18},{"year":2020,"cited_by_count":18},{"year":2019,"cited_by_count":25},{"year":2018,"cited_by_count":8}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
