{"id":"https://openalex.org/W3022125282","doi":"https://doi.org/10.3233/apc200047","title":"Design of an FPGA-Based Matrix Multiplier with Task Parallelism","display_name":"Design of an FPGA-Based Matrix Multiplier with Task Parallelism","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3022125282","doi":"https://doi.org/10.3233/apc200047","mag":"3022125282"},"language":"en","primary_location":{"id":"doi:10.3233/apc200047","is_oa":true,"landing_page_url":"https://doi.org/10.3233/apc200047","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/APC200047","source":{"id":"https://openalex.org/S4210175178","display_name":"Advances in parallel computing","issn_l":"0927-5452","issn":["0927-5452","1879-808X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Parallel Computing","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/APC200047","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052903922","display_name":"Yiyu Tan","orcid":"https://orcid.org/0000-0002-3566-4507"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Tan Yiyu","raw_affiliation_strings":["RIKEN Center for Computational Science, Kobe, Hyogo, Japan"],"affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science, Kobe, Hyogo, Japan","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086152822","display_name":"Toshiyuki Imamura","orcid":"https://orcid.org/0000-0003-1601-9710"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Imamura Toshiyuki","raw_affiliation_strings":["RIKEN Center for Computational Science, Kobe, Hyogo, Japan"],"affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science, Kobe, Hyogo, Japan","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035972892","display_name":"Daichi Mukunoki","orcid":"https://orcid.org/0000-0002-0051-6811"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Mukunoki Daichi","raw_affiliation_strings":["RIKEN Center for Computational Science, Kobe, Hyogo, Japan"],"affiliations":[{"raw_affiliation_string":"RIKEN Center for Computational Science, Kobe, Hyogo, Japan","institution_ids":["https://openalex.org/I4210129730"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5052903922"],"corresponding_institution_ids":["https://openalex.org/I4210129730"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.13039216,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7957064509391785},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7510509490966797},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.7308875322341919},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.614325761795044},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5859931111335754},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.570180356502533},{"id":"https://openalex.org/keywords/multiplier","display_name":"Multiplier (economics)","score":0.5463635325431824},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.5223070979118347},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4375326931476593},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.425687313079834},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.41408440470695496},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3542853593826294},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.32227662205696106},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.18024003505706787},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.16094431281089783},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08168566226959229},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.06754085421562195}],"concepts":[{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7957064509391785},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7510509490966797},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.7308875322341919},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.614325761795044},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5859931111335754},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.570180356502533},{"id":"https://openalex.org/C124584101","wikidata":"https://www.wikidata.org/wiki/Q1053266","display_name":"Multiplier (economics)","level":2,"score":0.5463635325431824},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.5223070979118347},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4375326931476593},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.425687313079834},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.41408440470695496},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3542853593826294},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.32227662205696106},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.18024003505706787},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.16094431281089783},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08168566226959229},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.06754085421562195},{"id":"https://openalex.org/C139719470","wikidata":"https://www.wikidata.org/wiki/Q39680","display_name":"Macroeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/apc200047","is_oa":true,"landing_page_url":"https://doi.org/10.3233/apc200047","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/APC200047","source":{"id":"https://openalex.org/S4210175178","display_name":"Advances in parallel computing","issn_l":"0927-5452","issn":["0927-5452","1879-808X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Parallel Computing","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/apc200047","is_oa":true,"landing_page_url":"https://doi.org/10.3233/apc200047","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/APC200047","source":{"id":"https://openalex.org/S4210175178","display_name":"Advances in parallel computing","issn_l":"0927-5452","issn":["0927-5452","1879-808X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Parallel Computing","raw_type":"book-chapter"},"sustainable_development_goals":[{"score":0.8999999761581421,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320317898","display_name":"Foundation for Computational Science","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3022125282.pdf","grobid_xml":"https://content.openalex.org/works/W3022125282.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W1582059966","https://openalex.org/W1989881993","https://openalex.org/W1995812446","https://openalex.org/W2020704575","https://openalex.org/W2023965391","https://openalex.org/W2056882228","https://openalex.org/W2059117794","https://openalex.org/W2061624656","https://openalex.org/W2073510583","https://openalex.org/W2132367502","https://openalex.org/W2143757465","https://openalex.org/W2575866180","https://openalex.org/W2786604785","https://openalex.org/W2901970548","https://openalex.org/W6600297719","https://openalex.org/W6600339457","https://openalex.org/W6604869935"],"related_works":["https://openalex.org/W4315697128","https://openalex.org/W4382323155","https://openalex.org/W2560894929","https://openalex.org/W2156524298","https://openalex.org/W2380696053","https://openalex.org/W4296617584","https://openalex.org/W2152567198","https://openalex.org/W4232263740","https://openalex.org/W2106942255","https://openalex.org/W4302004660"],"abstract_inverted_index":{"Matrix":[0],"multiplication":[1],"requires":[2],"computer":[3],"systems":[4],"have":[5],"huge":[6],"computing":[7],"capability":[8],"and":[9,29,41,60,91,115,127,147,153,159],"data":[10,65,75,104],"throughputs":[11],"as":[12],"problem":[13],"size":[14],"is":[15,27,47,171,179,228],"increased.":[16],"In":[17],"this":[18],"research,":[19],"an":[20,128],"OpenCL-based":[21],"matrix":[22,45,81,138,189],"multiplier":[23,46,82,139,190],"with":[24,54,98,103,122],"task":[25],"parallelism":[26,105],"designed":[28],"implemented":[30],"by":[31,142,154,214],"using":[32],"the":[33,50,64,79,99,108,112,136,168,175,183,186,195,199,203,207,222,226],"FPGA":[34,176],"board":[35],"DE5a-NET":[36],"to":[37,70],"improve":[38],"computation":[39,72,89,151,196],"throughput":[40,90,197],"energy":[42,95,163,218],"efficiency.":[43,96],"The":[44],"based":[48],"on":[49,106,119,206],"systolic":[51],"array":[52],"architecture":[53],"10":[55],"16":[56],"processing":[57],"elements":[58],"(PEs),":[59],"all":[61],"modules":[62,67],"except":[63],"loading":[66],"are":[68,76],"autorun":[69],"hide":[71],"overhead.":[73],"When":[74],"single-precision":[77],"floating-point,":[78],"proposed":[80,137,187],"averagely":[83,140],"achieves":[84],"about":[85],"785":[86],"GFLOPs":[87],"in":[88,94,111,150,162,174,182,202,217],"66.75":[92],"GFLOPs/W":[93],"Compared":[97],"Intel's":[100],"OpenCL":[101],"example":[102],"FPGA,":[107],"SGEMM":[109,200],"routines":[110],"Intel":[113,129],"MKL":[114],"OpenBLAS":[116],"libraries":[117],"executed":[118],"a":[120],"desktop":[121],"32":[123],"GB":[124],"DDR4":[125],"RAMs":[126],"i7-6800K":[130],"processor":[131],"running":[132],"at":[133],"3.4":[134],"GHz,":[135],"outperforms":[141,213],"3.2":[143],"times,":[144,146,156,158],"1.3":[145],"1.6":[148],"times":[149,161,216],"throughput,":[152],"2.9":[155],"10.5":[157],"11.8":[160],"efficiency,":[164],"respectively,":[165],"even":[166,220],"though":[167,221],"fabrication":[169,223],"technology":[170,224],"20":[172],"nm":[173,181],"while":[177],"it":[178,212],"14":[180],"CPU.":[184],"Although":[185],"FPGA-based":[188],"only":[191],"delivers":[192],"6.5%":[193],"of":[194,198,225],"routine":[201],"cuBLAS":[204],"performed":[205],"Nvidia":[208],"TITAN":[209],"V":[210],"GPU,":[211],"1.2":[215],"efficiency":[219],"GPU":[227],"12":[229],"nm.":[230]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2020-05-13T00:00:00"}
