{"id":"https://openalex.org/W4410985823","doi":"https://doi.org/10.1109/tvlsi.2025.3571677","title":"A Scalable FPGA Architecture With Adaptive Memory Utilization for GEMM-Based Operations","display_name":"A Scalable FPGA Architecture With Adaptive Memory Utilization for GEMM-Based Operations","publication_year":2025,"publication_date":"2025-06-03","ids":{"openalex":"https://openalex.org/W4410985823","doi":"https://doi.org/10.1109/tvlsi.2025.3571677"},"language":"en","primary_location":{"id":"doi:10.1109/tvlsi.2025.3571677","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2025.3571677","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.08137","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064384272","display_name":"\u0391\u03bd\u03b1\u03c3\u03c4\u03ac\u03c3\u03b9\u03bf\u03c2 \u03a0\u03b5\u03c4\u03c1\u03cc\u03c0\u03bf\u03c5\u03bb\u03bf\u03c2","orcid":"https://orcid.org/0000-0003-1669-5233"},"institutions":[{"id":"https://openalex.org/I174878644","display_name":"University of Patras","ror":"https://ror.org/017wvtq80","country_code":"GR","type":"education","lineage":["https://openalex.org/I174878644"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Anastasios Petropoulos","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Patras, Patras, Greece"],"raw_orcid":"https://orcid.org/0000-0003-1669-5233","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Patras, Patras, Greece","institution_ids":["https://openalex.org/I174878644"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060135944","display_name":"Theodore Antonakopoulos","orcid":"https://orcid.org/0000-0002-7863-1051"},"institutions":[{"id":"https://openalex.org/I174878644","display_name":"University of Patras","ror":"https://ror.org/017wvtq80","country_code":"GR","type":"education","lineage":["https://openalex.org/I174878644"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Theodore Antonakopoulos","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Patras, Patras, Greece"],"raw_orcid":"https://orcid.org/0000-0002-7863-1051","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Patras, Patras, Greece","institution_ids":["https://openalex.org/I174878644"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.4756,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.91408236,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"33","issue":"8","first_page":"2334","last_page":"2338"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.7905737161636353},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7441344857215881},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7138490676879883},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.687720537185669},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6606865525245667},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6515949964523315},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.36459460854530334},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.26056620478630066},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18247076869010925},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.06622877717018127}],"concepts":[{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.7905737161636353},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7441344857215881},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7138490676879883},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.687720537185669},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6606865525245667},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6515949964523315},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.36459460854530334},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.26056620478630066},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18247076869010925},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.06622877717018127},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tvlsi.2025.3571677","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2025.3571677","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2510.08137","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.08137","pdf_url":"https://arxiv.org/pdf/2510.08137","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.08137","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.08137","pdf_url":"https://arxiv.org/pdf/2510.08137","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.5400000214576721,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4410985823.pdf","grobid_xml":"https://content.openalex.org/works/W4410985823.grobid-xml"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2963511748","https://openalex.org/W2963701245","https://openalex.org/W2979455536","https://openalex.org/W2983964638","https://openalex.org/W3090482767","https://openalex.org/W3130920634","https://openalex.org/W3158338328","https://openalex.org/W4205171991","https://openalex.org/W4206572052","https://openalex.org/W4385896568","https://openalex.org/W4389082012","https://openalex.org/W4395680644","https://openalex.org/W4403278717","https://openalex.org/W4407720907"],"related_works":["https://openalex.org/W2111241003","https://openalex.org/W2355315220","https://openalex.org/W4200391368","https://openalex.org/W2210979487","https://openalex.org/W2316202402","https://openalex.org/W4401278057","https://openalex.org/W1967938402","https://openalex.org/W2386041993","https://openalex.org/W1608572506","https://openalex.org/W2038503502"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"network":[2],"(DNN)":[3],"inference":[4,107],"relies":[5],"increasingly":[6],"on":[7],"specialized":[8],"hardware":[9],"for":[10],"high":[11],"computational":[12],"efficiency.":[13],"This":[14],"work":[15],"introduces":[16],"a":[17,58,104],"field-programmable":[18],"gate":[19],"array":[20],"(FPGA)-based":[21],"dynamically":[22],"configurable":[23],"accelerator":[24],"featuring":[25],"systolic":[26],"arrays":[27],"(SAs),":[28],"high-bandwidth":[29],"memory":[30],"(HBM),":[31],"and":[32,49,56,95,114],"UltraRAMs.":[33],"We":[34],"present":[35],"two":[36],"processing":[37],"unit":[38],"(PU)":[39],"configurations":[40],"with":[41],"different":[42],"computing":[43,85],"capabilities":[44],"using":[45],"the":[46,63,76],"same":[47],"interfaces":[48],"peripheral":[50],"blocks.":[51],"By":[52],"instantiating":[53],"multiple":[54],"PUs":[55],"employing":[57],"heuristic":[59],"weight":[60],"transfer":[61],"schedule,":[62],"architecture":[64,77,109],"achieves":[65],"notable":[66],"throughput":[67],"efficiency":[68],"over":[69],"prior":[70],"works.":[71],"Moreover,":[72],"we":[73],"outline":[74],"how":[75],"can":[78],"be":[79],"extended":[80],"to":[81,88,111],"emulate":[82],"analog":[83],"in-memory":[84],"(AIMC)":[86],"devices":[87],"aid":[89],"next-generation":[90],"heterogeneous":[91],"AIMC":[92],"chip":[93],"designs":[94],"investigate":[96],"device-level":[97],"noise":[98],"behavior.":[99],"Overall,":[100],"this":[101],"brief":[102],"presents":[103],"versatile":[105],"DNN":[106],"acceleration":[108],"adaptable":[110],"various":[112],"models":[113],"future":[115],"FPGA":[116],"designs.":[117]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
