{"id":"https://openalex.org/W4319870586","doi":"https://doi.org/10.1145/3543622.3573171","title":"Single-Batch CNN Training using Block Minifloats on FPGAs","display_name":"Single-Batch CNN Training using Block Minifloats on FPGAs","publication_year":2023,"publication_date":"2023-02-10","ids":{"openalex":"https://openalex.org/W4319870586","doi":"https://doi.org/10.1145/3543622.3573171"},"language":"en","primary_location":{"id":"doi:10.1145/3543622.3573171","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1145/3543622.3573171","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058156841","display_name":"Chuliang Guo","orcid":"https://orcid.org/0000-0001-7403-0163"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Chuliang Guo","raw_affiliation_strings":["The University of Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060393103","display_name":"Binglei Lou","orcid":"https://orcid.org/0000-0003-4662-1892"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Binglei Lou","raw_affiliation_strings":["The University of Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085173772","display_name":"Xueyuan Liu","orcid":"https://orcid.org/0000-0001-5397-3066"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xueyuan Liu","raw_affiliation_strings":["The University of Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040792997","display_name":"David Boland","orcid":"https://orcid.org/0000-0001-5370-4464"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"David Boland","raw_affiliation_strings":["The University of Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5107994859","display_name":"Philip H. W. Leong","orcid":"https://orcid.org/0000-0002-3923-3499"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Philip H.W. Leong","raw_affiliation_strings":["The University of Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5058156841"],"corresponding_institution_ids":["https://openalex.org/I129604602"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00803255,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"53","last_page":"53"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9821000099182129,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8092639446258545},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6871846914291382},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.6020540595054626},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.5338066220283508},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5066131353378296},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.49803829193115234},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4709010422229767},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.45424872636795044},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.42080622911453247},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4193729758262634},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3589648902416229},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.35267430543899536},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20243871212005615},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10750725865364075},{"id":"https://openalex.org/keywords/wireless","display_name":"Wireless","score":0.09577682614326477},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.081809401512146}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8092639446258545},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6871846914291382},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.6020540595054626},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5338066220283508},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5066131353378296},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.49803829193115234},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4709010422229767},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.45424872636795044},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.42080622911453247},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4193729758262634},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3589648902416229},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.35267430543899536},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20243871212005615},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10750725865364075},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.09577682614326477},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.081809401512146},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3543622.3573171","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1145/3543622.3573171","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2111241003","https://openalex.org/W4391382578","https://openalex.org/W3176282186","https://openalex.org/W4387489555","https://openalex.org/W2973622361","https://openalex.org/W4293053895","https://openalex.org/W4288024917","https://openalex.org/W2983364019","https://openalex.org/W2998183476","https://openalex.org/W3215372595"],"abstract_inverted_index":{"Training":[0],"convolutional":[1],"neural":[2],"networks":[3],"remains":[4],"a":[5,39,63,70,77,133,152,157,192,200],"challenge":[6],"on":[7,42,187,199],"resource-limited":[8],"edge":[9],"devices":[10],"due":[11],"to":[12,33,68,89,109,137],"its":[13],"intensive":[14],"computations,":[15],"large":[16],"storage":[17],"requirements,":[18],"and":[19,26,44,83,102,117,141,147,151,211,215],"high":[20,31],"bandwidth.":[21,45],"Error":[22],"back-propagation,":[23],"gradient":[24,163],"generation,":[25],"weight":[27,158],"update":[28],"usually":[29],"require":[30],"precision":[32,112],"guarantee":[34],"model":[35],"accuracy,":[36],"which":[37],"places":[38],"further":[40],"burden":[41],"computation":[43],"This":[46],"paper":[47],"presents":[48],"the":[49,92,120,145,175,188],"first":[50,61],"parallel":[51],"FPGA":[52,204],"CNN":[53],"training":[54,184],"accelerator":[55,198],"with":[56,76,139,156,191,214],"block":[57,73,136,155],"minifloat":[58,74],"datatypes.":[59],"We":[60],"propose":[62,132],"heuristic":[64],"bit-width":[65],"allocation":[66],"technique":[67],"derive":[69],"unified":[71,134],"8-bit":[72],"format":[75,95],"sign":[78],"bit,":[79],"2":[80],"exponent":[81],"bits,":[82],"5":[84],"mantissa":[85],"bits.":[86],"In":[87,129],"contrast":[88],"previous":[90],"techniques,":[91],"same":[93],"data":[94],"is":[96,115],"used":[97],"for":[98,162,174],"weights,":[99],"activations,":[100],"errors,":[101],"gradients.":[103],"Using":[104],"this":[105,171],"format,":[106],"accuracy":[107],"similar":[108],"32-bit":[110],"single":[111],"floating":[113],"point":[114],"achieved":[116],"thus":[118],"simplifies":[119],"FPGA-based":[121],"designs":[122],"of":[123,185,195,209],"computational":[124],"units":[125],"such":[126],"as":[127],"multiply-and-add.":[128],"addition,":[130],"we":[131],"Conv":[135,140,143,154,166],"deal":[138],"transposed":[142],"in":[144,180],"forward":[146],"backward":[148],"paths":[149],"respectively;":[150],"dilated":[153],"kernel":[159],"partition":[160],"scheme":[161],"generation.":[164],"Both":[165],"blocks":[167],"support":[168],"non-unit":[169],"stride,":[170],"being":[172],"crucial":[173],"residual":[176],"connections":[177],"that":[178],"appear":[179],"modern":[181],"CNNs.":[182],"For":[183],"ResNet20":[186],"CIFAR-10":[189],"dataset":[190],"batch":[193,217],"size":[194],"1,":[196],"our":[197],"Xilinx":[201],"Ultrascale+":[202],"ZCU102":[203],"achieves":[205],"state-of-the-art":[206],"single-batch":[207],"throughput":[208],"144.64":[210],"192.68":[212],"GOPs":[213],"without":[216],"normalisation":[218],"layers":[219],"respectively.":[220]},"counts_by_year":[],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
