{"id":"https://openalex.org/W7133507254","doi":"https://doi.org/10.1109/hpca68181.2026.11408490","title":"RPU \u2013 A Reasoning Processing Unit","display_name":"RPU \u2013 A Reasoning Processing Unit","publication_year":2026,"publication_date":"2026-01-31","ids":{"openalex":"https://openalex.org/W7133507254","doi":"https://doi.org/10.1109/hpca68181.2026.11408490"},"language":null,"primary_location":{"id":"doi:10.1109/hpca68181.2026.11408490","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408490","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007527025","display_name":"Matthew Joseph Adiletta","orcid":null},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Matthew Joseph Adiletta","raw_affiliation_strings":["Harvard University"],"affiliations":[{"raw_affiliation_string":"Harvard University","institution_ids":["https://openalex.org/I2801851002"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128190865","display_name":"Gu-Yeon Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gu-Yeon Wei","raw_affiliation_strings":["Harvard University"],"affiliations":[{"raw_affiliation_string":"Harvard University","institution_ids":["https://openalex.org/I2801851002"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5107309139","display_name":"David R. Brooks","orcid":null},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Brooks","raw_affiliation_strings":["Harvard University"],"affiliations":[{"raw_affiliation_string":"Harvard University","institution_ids":["https://openalex.org/I2801851002"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5007527025"],"corresponding_institution_ids":["https://openalex.org/I2801851002"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.92753623,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.3167000114917755,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.3167000114917755,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11010","display_name":"Logic, Reasoning, and Knowledge","score":0.23759999871253967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.06289999932050705,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7271999716758728},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5343000292778015},{"id":"https://openalex.org/keywords/provisioning","display_name":"Provisioning","score":0.526199996471405},{"id":"https://openalex.org/keywords/microarchitecture","display_name":"Microarchitecture","score":0.4683000147342682},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4641000032424927},{"id":"https://openalex.org/keywords/inference-engine","display_name":"Inference engine","score":0.42879998683929443},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4049000144004822},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.3815000057220459},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.37700000405311584}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7714999914169312},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7271999716758728},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.630299985408783},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5343000292778015},{"id":"https://openalex.org/C172191483","wikidata":"https://www.wikidata.org/wiki/Q1071806","display_name":"Provisioning","level":2,"score":0.526199996471405},{"id":"https://openalex.org/C107598950","wikidata":"https://www.wikidata.org/wiki/Q259864","display_name":"Microarchitecture","level":2,"score":0.4683000147342682},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4641000032424927},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.42879998683929443},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4049000144004822},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.38589999079704285},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.3815000057220459},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.37700000405311584},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.37310001254081726},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.364300012588501},{"id":"https://openalex.org/C82687282","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Auxiliary memory","level":2,"score":0.3386000096797943},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.335999995470047},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.33219999074935913},{"id":"https://openalex.org/C98025372","wikidata":"https://www.wikidata.org/wiki/Q477538","display_name":"Systems architecture","level":3,"score":0.3248000144958496},{"id":"https://openalex.org/C2779581428","wikidata":"https://www.wikidata.org/wiki/Q7122997","display_name":"Packet processing","level":3,"score":0.3188999891281128},{"id":"https://openalex.org/C177950962","wikidata":"https://www.wikidata.org/wiki/Q10997658","display_name":"Non-volatile memory","level":2,"score":0.31790000200271606},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.31679999828338623},{"id":"https://openalex.org/C3720319","wikidata":"https://www.wikidata.org/wiki/Q5015937","display_name":"Cache-only memory architecture","level":5,"score":0.2944999933242798},{"id":"https://openalex.org/C162262903","wikidata":"https://www.wikidata.org/wiki/Q343527","display_name":"Allocator","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.27889999747276306},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.26499998569488525},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2599000036716461},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.2578999996185303}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca68181.2026.11408490","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408490","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2100430002","https://openalex.org/W2899956174","https://openalex.org/W2946072469","https://openalex.org/W2980104813","https://openalex.org/W3207622241","https://openalex.org/W4245923077","https://openalex.org/W4281654814","https://openalex.org/W4312060029","https://openalex.org/W4324292875","https://openalex.org/W4380874652","https://openalex.org/W4380874786","https://openalex.org/W4385834084","https://openalex.org/W4387321091","https://openalex.org/W4391594310","https://openalex.org/W4394998968","https://openalex.org/W4401211704","https://openalex.org/W4408029577","https://openalex.org/W4411092069"],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"model":[2],"(LLM)":[3],"inference":[4,66],"performance":[5,26],"is":[6,34],"increasingly":[7],"bottlenecked":[8],"by":[9,36],"the":[10,69,86,97,100],"memory":[11,28,55,78,102],"wall.":[12,103],"While":[13],"GPUs":[14],"continue":[15],"to":[16,23,95,145,157],"scale":[17],"raw":[18],"compute":[19],"throughput,":[20],"they":[21],"struggle":[22],"deliver":[24],"scalable":[25,77,122],"for":[27,71,76,115],"bandwidth":[29,148],"bound":[30],"workloads.":[31],"This":[32],"challenge":[33],"amplified":[35],"emerging":[37],"reasoning":[38],"LLM":[39],"applications,":[40],"where":[41],"long":[42],"output":[43],"sequences,":[44],"low":[45],"arithmetic":[46],"intensity,":[47],"and":[48,63,118,129,133,142,164],"tight":[49],"latency":[50,163],"constraints":[51],"demand":[52],"significantly":[53],"higher":[54,169],"bandwidth.":[56,79],"As":[57],"a":[58,91,121,126,135],"result,":[59],"system":[60,74,174],"utilization":[61],"drops":[62],"energy":[64,117],"per":[65],"rises,":[67],"highlighting":[68],"need":[70],"an":[72,172],"optimized":[73],"architecture":[75,93,124],"To":[80],"address":[81,96],"these":[82],"challenges":[83,98],"we":[84],"present":[85],"Reasoning":[87],"Processing":[88],"Unit":[89],"(RPU),":[90],"chiplet-based":[92],"designed":[94],"of":[99],"modern":[101],"RPU":[104,154],"introduces:":[105],"(1)":[106],"A":[107],"Capacity-Optimized":[108],"High-Bandwidth":[109],"Memory":[110],"(HBM-CO)":[111],"that":[112,138,153],"trades":[113],"capacity":[114],"lower":[116,162],"cost;":[119],"(2)":[120],"chiplet":[123],"featuring":[125],"bandwidth-first":[127],"power":[128],"area":[130],"provisioning":[131],"design;":[132],"(3)":[134],"decoupled":[136],"microarchitecture":[137],"separates":[139],"memory,":[140],"compute,":[141],"communication":[143],"pipelines":[144],"sustain":[146],"high":[147],"utilization.":[149],"Simulation":[150],"results":[151],"show":[152],"performs":[155],"up":[156],"<tex":[158,165],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[159,166],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$45.3":[160],"\\times$</tex>":[161,168],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$18.6":[167],"throughput":[170],"over":[171],"H100":[173],"at":[175],"ISO-TDP":[176],"on":[177],"Llama3-405B.":[178]},"counts_by_year":[],"updated_date":"2026-03-06T06:45:51.903784","created_date":"2026-02-27T00:00:00"}
