{"id":"https://openalex.org/W4405022468","doi":"https://doi.org/10.1109/tcad.2024.3511343","title":"Parallel Accurate Minifloat MACCs for Neural Network Inference on Versal FPGAs","display_name":"Parallel Accurate Minifloat MACCs for Neural Network Inference on Versal FPGAs","publication_year":2024,"publication_date":"2024-12-04","ids":{"openalex":"https://openalex.org/W4405022468","doi":"https://doi.org/10.1109/tcad.2024.3511343"},"language":"en","primary_location":{"id":"doi:10.1109/tcad.2024.3511343","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tcad.2024.3511343","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1109/tcad.2024.3511343","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021149679","display_name":"Hans Jakob Damsgaard","orcid":"https://orcid.org/0000-0001-8409-0282"},"institutions":[{"id":"https://openalex.org/I4210133110","display_name":"Tampere University","ror":null,"country_code":"FI","type":null,"lineage":["https://openalex.org/I4210133110"]},{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Hans Jakob Damsgaard","raw_affiliation_strings":["SoC Hub Research Centre and the Wireless Research Centre, Tampere University, Tampere, Finland","Faculty of Information Technology and Communication Sciences, Electrical Engineering Unit, Tampere University, Tampere, Finland"],"affiliations":[{"raw_affiliation_string":"SoC Hub Research Centre and the Wireless Research Centre, Tampere University, Tampere, Finland","institution_ids":["https://openalex.org/I4210133110"]},{"raw_affiliation_string":"Faculty of Information Technology and Communication Sciences, Electrical Engineering Unit, Tampere University, Tampere, Finland","institution_ids":["https://openalex.org/I166825849"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5098936747","display_name":"Konstantin J. Ho\u00dffeld","orcid":"https://orcid.org/0009-0005-9542-3317"},"institutions":[{"id":"https://openalex.org/I4210158945","display_name":"Advanced Mask Technology Center (Germany)","ror":"https://ror.org/04pnnsh51","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210158945"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Konstantin J. Ho\u00dffeld","raw_affiliation_strings":["AMD, Research and Advanced Development, Dresden, Germany","AMD Research, Dresden, Germany"],"affiliations":[{"raw_affiliation_string":"AMD, Research and Advanced Development, Dresden, Germany","institution_ids":["https://openalex.org/I4210158945"]},{"raw_affiliation_string":"AMD Research, Dresden, Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035297149","display_name":"Jari Nurmi","orcid":"https://orcid.org/0000-0003-2169-4606"},"institutions":[{"id":"https://openalex.org/I4210133110","display_name":"Tampere University","ror":null,"country_code":"FI","type":null,"lineage":["https://openalex.org/I4210133110"]},{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Jari Nurmi","raw_affiliation_strings":["SoC Hub Research Centre and the Wireless Research Centre, Tampere University, Tampere, Finland","Faculty of Information Technology and Communication Sciences, Electrical Engineering Unit, Tampere University, Tampere, Finland"],"affiliations":[{"raw_affiliation_string":"SoC Hub Research Centre and the Wireless Research Centre, Tampere University, Tampere, Finland","institution_ids":["https://openalex.org/I4210133110"]},{"raw_affiliation_string":"Faculty of Information Technology and Communication Sciences, Electrical Engineering Unit, Tampere University, Tampere, Finland","institution_ids":["https://openalex.org/I166825849"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055093027","display_name":"Thomas B. Preu\u00dfer","orcid":"https://orcid.org/0000-0003-3998-7896"},"institutions":[{"id":"https://openalex.org/I4210158945","display_name":"Advanced Mask Technology Center (Germany)","ror":"https://ror.org/04pnnsh51","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210158945"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Thomas B. Preu\u00dfer","raw_affiliation_strings":["AMD, Research and Advanced Development, Dresden, Germany","AMD Research, Dresden, Germany"],"affiliations":[{"raw_affiliation_string":"AMD, Research and Advanced Development, Dresden, Germany","institution_ids":["https://openalex.org/I4210158945"]},{"raw_affiliation_string":"AMD Research, Dresden, Germany","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5021149679"],"corresponding_institution_ids":["https://openalex.org/I166825849","https://openalex.org/I4210133110"],"apc_list":null,"apc_paid":null,"fwci":0.7088,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.77549637,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"44","issue":"6","first_page":"2181","last_page":"2194"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9175000190734863,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9175000190734863,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5460082292556763},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5455271005630493},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5296155214309692},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5056130886077881},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.2423628568649292},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19458651542663574}],"concepts":[{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5460082292556763},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5455271005630493},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5296155214309692},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5056130886077881},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2423628568649292},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19458651542663574}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tcad.2024.3511343","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tcad.2024.3511343","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},{"id":"pmh:oai:trepo.tuni.fi:10024/212102","is_oa":true,"landing_page_url":"https://trepo.tuni.fi/handle/10024/212102","pdf_url":null,"source":{"id":"https://openalex.org/S7407055260","display_name":"Trepo - Institutional Repository of Tampere University","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:zenodo.org:14312383","is_oa":true,"landing_page_url":"https://doi.org/10.1109/TCAD.2024.3511343","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems, (2024-12-04)","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1109/tcad.2024.3511343","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tcad.2024.3511343","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G773759176","display_name":null,"funder_award_id":"Heterogeneous Accelerated Compute Cluster (HACC) P","funder_id":"https://openalex.org/F4320307757","funder_display_name":"Advanced Micro Devices"},{"id":"https://openalex.org/G824759304","display_name":null,"funder_award_id":"APROPOS (956090)","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"}],"funders":[{"id":"https://openalex.org/F4320307757","display_name":"Advanced Micro Devices","ror":"https://ror.org/04kd6c783"},{"id":"https://openalex.org/F4320332999","display_name":"Horizon 2020 Framework Programme","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1506162491","https://openalex.org/W1527834376","https://openalex.org/W2030898836","https://openalex.org/W2030955243","https://openalex.org/W2046885698","https://openalex.org/W2098158887","https://openalex.org/W2104266867","https://openalex.org/W2125385721","https://openalex.org/W2131812496","https://openalex.org/W2132094037","https://openalex.org/W2166000469","https://openalex.org/W2194775991","https://openalex.org/W2236988532","https://openalex.org/W2344290618","https://openalex.org/W2395134749","https://openalex.org/W2762374354","https://openalex.org/W2770593197","https://openalex.org/W2891946740","https://openalex.org/W2981751377","https://openalex.org/W2981991292","https://openalex.org/W2985030833","https://openalex.org/W4205729510","https://openalex.org/W4210258659","https://openalex.org/W4242188747","https://openalex.org/W4283801046","https://openalex.org/W4319996493","https://openalex.org/W4321637080","https://openalex.org/W4391718800","https://openalex.org/W4403278766","https://openalex.org/W6736972661","https://openalex.org/W6751552315","https://openalex.org/W6753770798","https://openalex.org/W6756007670","https://openalex.org/W6762484958","https://openalex.org/W6767032739","https://openalex.org/W6776767859","https://openalex.org/W6785662278","https://openalex.org/W6791940793","https://openalex.org/W6842145542","https://openalex.org/W6846164622","https://openalex.org/W6860718643"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2111241003","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W2355315220","https://openalex.org/W4200391368","https://openalex.org/W2210979487"],"abstract_inverted_index":{"Machine":[0],"learning":[1],"(ML)":[2],"is":[3],"ubiquitous":[4],"in":[5,209],"contemporary":[6],"applications.":[7],"Its":[8],"need":[9],"for":[10,184],"efficient":[11],"acceleration":[12],"has":[13],"driven":[14],"vast":[15],"research":[16],"efforts":[17],"into":[18,45,188],"the":[19,57,64,74,85,118,122,146,162],"quantization":[20],"of":[21,33,41,59,67,84,91,121,149,212],"neural":[22],"networks":[23],"with":[24,30,81,113,139,179],"low-precision":[25],"numerical":[26],"formats.":[27],"Models":[28],"quantized":[29,44],"minifloat":[31,70,93,123,168],"formats":[32],"eight":[34],"or":[35],"fewer":[36],"bits":[37],"have":[38],"proven":[39],"capable":[40],"outperforming":[42],"models":[43],"same-size":[46,180],"integers.":[47],"However,":[48],"unlike":[49],"integers,":[50],"minifloats":[51],"require":[52],"accurate":[53,69],"accumulation":[54],"to":[55,101,172,194,205],"prevent":[56],"introduction":[58],"rounding":[60],"errors.":[61],"We":[62,79],"explore":[63],"design":[65],"space":[66],"parallel":[68,103],"multiply-accumulators":[71],"(MACCs)":[72],"targeting":[73],"AMD":[75],"VersalTM":[76],"FPGA":[77],"fabric.":[78],"experiment":[80],"three":[82],"variations":[83],"multiply-and-shift":[86],"and":[87,127,154,192,222],"adder":[88],"tree":[89],"components":[90],"a":[92,102,185,189],"MACC.":[94,105],"For":[95],"comparison,":[96,135],"we":[97,165],"apply":[98],"similar":[99],"alterations":[100],"integer":[104,150,177],"Our":[106,201],"results":[107],"show":[108],"that":[109,167],"custom":[110,136],"compressor":[111,137],"trees":[112,138],"external":[114],"sign-inversion":[115],"gates":[116,144],"reduce":[117,145],"mean":[119,147],"area":[120,148],"MACCs":[124,151,169],"by":[125,132,152,159],"17.7%":[126],"increase":[128,155],"their":[129,156,210],"clock":[130,157],"frequency":[131,158],"16.2%.":[133],"In":[134],"absorbed":[140],"partial":[141],"product":[142],"generation":[143],"28.1%":[153],"3.60%.":[160],"Comparing":[161],"best-performing":[163],"designs,":[164],"observe":[166],"consume":[170],"20%":[171],"180%":[173],"more":[174,196],"resources":[175,197],"than":[176],"ones":[178],"operands":[181],"without":[182],"accounting":[183],"conversion":[186],"back":[187],"floating-point":[190],"format,":[191],"60%":[193],"300%":[195],"when":[198,218],"including":[199],"it.":[200],"data":[202],"enable":[203],"engineers":[204],"make":[206],"informed":[207],"decisions":[208],"designs":[211],"deeply":[213],"integrated":[214],"embedded":[215],"ML":[216],"solutions":[217],"trading":[219],"off":[220],"training":[221],"fine-tuning":[223],"effort":[224],"versus":[225],"resource":[226],"cost.":[227]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
