{"id":"https://openalex.org/W4413887435","doi":"https://doi.org/10.1109/tc.2025.3603692","title":"TeraPool: A Physical Design Aware, 1024 RISC-V Cores Shared-L1-Memory Scaled-Up Cluster Design With High Bandwidth Main Memory Link","display_name":"TeraPool: A Physical Design Aware, 1024 RISC-V Cores Shared-L1-Memory Scaled-Up Cluster Design With High Bandwidth Main Memory Link","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4413887435","doi":"https://doi.org/10.1109/tc.2025.3603692"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2025.3603692","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2025.3603692","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2603.01629","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100625410","display_name":"Yichao Zhang","orcid":"https://orcid.org/0009-0008-7508-599X"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Yichao Zhang","raw_affiliation_strings":["Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020963902","display_name":"Marco Bertuletti","orcid":"https://orcid.org/0000-0001-7576-0803"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Marco Bertuletti","raw_affiliation_strings":["Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100458165","display_name":"Chi Zhang","orcid":"https://orcid.org/0000-0002-2503-857X"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Chi Zhang","raw_affiliation_strings":["Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025031387","display_name":"Samuel Riedel","orcid":"https://orcid.org/0000-0002-5772-6377"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Samuel Riedel","raw_affiliation_strings":["Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057255049","display_name":"Dejian Shen","orcid":"https://orcid.org/0000-0002-0283-6835"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Diyou Shen","raw_affiliation_strings":["Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100412567","display_name":"Bowen Wang","orcid":"https://orcid.org/0009-0006-7321-7456"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Bowen Wang","raw_affiliation_strings":["Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003927271","display_name":"Alessandro Vanelli\u2010Coralli","orcid":"https://orcid.org/0000-0002-4475-5718"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]},{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Alessandro Vanelli-Coralli","raw_affiliation_strings":["Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland","IISETH Zurich"],"affiliations":[{"raw_affiliation_string":"Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]},{"raw_affiliation_string":"IISETH Zurich","institution_ids":["https://openalex.org/I202697423"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043408422","display_name":"Luca Benini","orcid":"https://orcid.org/0000-0001-8068-3806"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]},{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Luca Benini","raw_affiliation_strings":["Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland","IISETH Zurich"],"affiliations":[{"raw_affiliation_string":"Integrated Systems Laboratory (IIS), ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]},{"raw_affiliation_string":"IISETH Zurich","institution_ids":["https://openalex.org/I202697423"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5100625410"],"corresponding_institution_ids":["https://openalex.org/I35440088"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26079683,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"74","issue":"11","first_page":"3667","last_page":"3681"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.958299994468689,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10472","display_name":"Semiconductor materials and devices","score":0.9562000036239624,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7232351899147034},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5137468576431274},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.49307748675346375},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.45919832587242126},{"id":"https://openalex.org/keywords/reduced-instruction-set-computing","display_name":"Reduced instruction set computing","score":0.45121458172798157},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4219835102558136},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3303646445274353},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.2266772985458374},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1910356879234314},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.15526902675628662}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7232351899147034},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5137468576431274},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.49307748675346375},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.45919832587242126},{"id":"https://openalex.org/C126298526","wikidata":"https://www.wikidata.org/wiki/Q189376","display_name":"Reduced instruction set computing","level":3,"score":0.45121458172798157},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4219835102558136},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3303646445274353},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.2266772985458374},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1910356879234314},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.15526902675628662}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tc.2025.3603692","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2025.3603692","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2603.01629","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2603.01629","pdf_url":"https://arxiv.org/pdf/2603.01629","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:cris.unibo.it:11585/1039409","is_oa":false,"landing_page_url":"https://hdl.handle.net/11585/1039409","pdf_url":null,"source":{"id":"https://openalex.org/S4306402579","display_name":"Archivio istituzionale della ricerca (Alma Mater Studiorum Universit\u00e0 di Bologna)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210117483","host_organization_name":"Istituto di Ematologia di Bologna","host_organization_lineage":["https://openalex.org/I4210117483"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2603.01629","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2603.01629","pdf_url":"https://arxiv.org/pdf/2603.01629","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1978236050","https://openalex.org/W1982063824","https://openalex.org/W1999085092","https://openalex.org/W2009872315","https://openalex.org/W2026070376","https://openalex.org/W2038454316","https://openalex.org/W2045886945","https://openalex.org/W2461193710","https://openalex.org/W2504211376","https://openalex.org/W2612168417","https://openalex.org/W2769815320","https://openalex.org/W2965653519","https://openalex.org/W2967335569","https://openalex.org/W3043263328","https://openalex.org/W3095108256","https://openalex.org/W3119921680","https://openalex.org/W3135242540","https://openalex.org/W3211226062","https://openalex.org/W4206371601","https://openalex.org/W4221013715","https://openalex.org/W4293149165","https://openalex.org/W4312801611","https://openalex.org/W4312968147","https://openalex.org/W4386090486","https://openalex.org/W4388469806","https://openalex.org/W4391944268","https://openalex.org/W4396816723","https://openalex.org/W4401211850","https://openalex.org/W4404918643","https://openalex.org/W4404954401","https://openalex.org/W4406983056"],"related_works":["https://openalex.org/W2135839484","https://openalex.org/W2035206467","https://openalex.org/W2042824850","https://openalex.org/W4413068361","https://openalex.org/W2512308948","https://openalex.org/W2068921804","https://openalex.org/W2984139344","https://openalex.org/W2077105843","https://openalex.org/W2770465587","https://openalex.org/W2086716781"],"abstract_inverted_index":{"Shared":[0],"L1-memory":[1],"clusters":[2,36,240],"of":[3,35,81,158,175,185,213,223,237],"streamlined":[4],"instruction":[5],"processors":[6],"(processing":[7],"elements":[8],"-":[9],"PEs)":[10],"are":[11],"commonly":[12],"used":[13],"as":[14],"building":[15],"blocks":[16],"in":[17,53,129,217,242],"modern,":[18],"massively":[19],"parallel":[20],"computing":[21],"architectures":[22,30],"(e.g.":[23],"GP-GPUs).":[24],"<italic":[25,66],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[26,67],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Scaling":[27,68],"out</i>":[28],"these":[29],"by":[31,43],"increasing":[32],"the":[33,44,62,70,79,156,176,182,192,221,234,238],"number":[34],"incurs":[37],"computational":[38],"and":[39,48,55,75,203],"power":[40],"overhead,":[41],"caused":[42],"requirement":[45],"to":[46,170,196,205,228],"split":[47],"merge":[49],"large":[50],"data":[51,172],"structures":[52],"chunks":[54,57],"move":[56],"across":[58],"memory":[59,115,151,166],"hierarchies":[60],"via":[61,116],"high-latency":[63],"global":[64],"interconnect.":[65],"up</i>":[69],"cluster":[71,108,193,227],"reduces":[72],"buffering,":[73],"copy,":[74],"synchronization":[76],"overheads.":[77],"However,":[78],"complexity":[80],"a":[82,92,100,111,117,159,210,225,229],"fully":[83],"connected":[84],"cores-to-L1-memory":[85],"crossbar":[86],"grows":[87],"quadratically":[88],"with":[89],"PE-count,":[90],"posing":[91],"major":[93],"physical":[94],"implementation":[95],"challenge.":[96],"We":[97],"present":[98],"TeraPool,":[99],"physically":[101],"implementable,":[102],">1000":[103],"floating-point-capable":[104],"RISC-V":[105],"PEs":[106],"scaled-up":[107],"design,":[109],"sharing":[110],"Multi-MegaByte":[112],">4000-banked":[113],"L1":[114],"low":[118],"latency":[119],"hierarchical":[120,143],"interconnect":[121,145],"(1-7/9/11":[122],"cycles,":[123],"depending":[124],"on":[125,215],"target":[126],"frequency).":[127],"Implemented":[128],"12nm":[130],"FinFET":[131],"technology,":[132],"TeraPool":[133],"achieves":[134],"near-gigahertz":[135],"frequencies":[136],"(910MHz)":[137],"typical,":[138],"0.80V/25":[139],"\u00b0C.":[140],"The":[141],"energy-efficient":[142],"PE-to-L1-memory":[144],"consumes":[146],"only":[147],"9-13.5":[148],"pJ":[149],"for":[150],"bank":[152],"accesses,":[153],"just":[154],"0.74-1.1\u00d7":[155],"cost":[157],"FP32":[160],"FMA.":[161],"A":[162],"high":[163,211],"bandwidth":[164,184],"main":[165,188],"link":[167],"is":[168],"designed":[169],"manage":[171],"transfers":[173,180],"in/out":[174],"shared":[177],"L1,":[178],"sustaining":[179],"at":[181],"full":[183],"an":[186],"HBM2E":[187],"memory.":[189],"At":[190],"910MHz,":[191],"delivers":[194],"up":[195,204],"1.89":[197],"single":[198],"precision":[199],"TFLOP/s":[200],"peak":[201],"performance":[202],"200GFLOP/s/W":[206],"energy":[207],"efficiency":[208],"(at":[209],"IPC/PE":[212],"0.8":[214],"average)":[216],"benchmark":[218],"kernels,":[219],"demonstrating":[220],"feasibility":[222],"scaling":[224],"shared-L1":[226],"thousand":[230],"PEs,":[231],"four":[232],"times":[233],"PE":[235],"count":[236],"largest":[239],"reported":[241],"literature.":[243]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
