{"id":"https://openalex.org/W6903240423","doi":"https://doi.org/10.1145/3725843.3756121","title":"Pimba: A Processing-in-Memory Acceleration for Post-Transformer Large Language Model Serving","display_name":"Pimba: A Processing-in-Memory Acceleration for Post-Transformer Large Language Model Serving","publication_year":2025,"publication_date":"2025-10-17","ids":{"openalex":"https://openalex.org/W6903240423","doi":"https://doi.org/10.1145/3725843.3756121"},"language":"en","primary_location":{"id":"doi:10.1145/3725843.3756121","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3725843.3756121","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 58th IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2507.10178","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Wonung Kim","orcid":"https://orcid.org/0009-0000-1180-0503"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Wonung Kim","raw_affiliation_strings":["KAIST, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0000-1180-0503","affiliations":[{"raw_affiliation_string":"KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yubin Lee","orcid":"https://orcid.org/0009-0002-6541-6739"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yubin Lee","raw_affiliation_strings":["KAIST, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0002-6541-6739","affiliations":[{"raw_affiliation_string":"KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yoonsung Kim","orcid":"https://orcid.org/0009-0000-2333-292X"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yoonsung Kim","raw_affiliation_strings":["KAIST, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0000-2333-292X","affiliations":[{"raw_affiliation_string":"KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jinwoo Hwang","orcid":"https://orcid.org/0009-0008-8498-2502"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jinwoo Hwang","raw_affiliation_strings":["KAIST, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0008-8498-2502","affiliations":[{"raw_affiliation_string":"KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Seongryong Oh","orcid":"https://orcid.org/0009-0004-6707-0641"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Seongryong Oh","raw_affiliation_strings":["KAIST, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0004-6707-0641","affiliations":[{"raw_affiliation_string":"KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiyong Jung","orcid":"https://orcid.org/0009-0007-0420-179X"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jiyong Jung","raw_affiliation_strings":["KAIST, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0007-0420-179X","affiliations":[{"raw_affiliation_string":"KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Aziz Huseynov","orcid":"https://orcid.org/0009-0009-0516-0605"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Aziz Huseynov","raw_affiliation_strings":["KAIST, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0009-0516-0605","affiliations":[{"raw_affiliation_string":"KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Woong Gyu Park","orcid":"https://orcid.org/0009-0002-4106-8039"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Woong Gyu Park","raw_affiliation_strings":["KAIST, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0002-4106-8039","affiliations":[{"raw_affiliation_string":"KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Chang Hyun Park","orcid":"https://orcid.org/0000-0002-8250-8574"},"institutions":[{"id":"https://openalex.org/I123387679","display_name":"Uppsala University","ror":"https://ror.org/048a87296","country_code":"SE","type":"education","lineage":["https://openalex.org/I123387679"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Chang Hyun Park","raw_affiliation_strings":["Uppsala University, Atlanta, GA, USA"],"raw_orcid":"https://orcid.org/0000-0002-8250-8574","affiliations":[{"raw_affiliation_string":"Uppsala University, Atlanta, GA, USA","institution_ids":["https://openalex.org/I123387679"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Divya Mahajan","orcid":"https://orcid.org/0009-0007-8184-0528"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Divya Mahajan","raw_affiliation_strings":["Georgia Institute of Technology, Uppsala, Sweden"],"raw_orcid":"https://orcid.org/0009-0007-8184-0528","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Uppsala, Sweden","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":null,"display_name":"Jongse Park","orcid":"https://orcid.org/0000-0002-6629-449X"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jongse Park","raw_affiliation_strings":["KAIST, Daejeon, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0002-6629-449X","affiliations":[{"raw_affiliation_string":"KAIST, Daejeon, Republic of Korea","institution_ids":["https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.20615925,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"292","last_page":"307"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2502000033855438,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2502000033855438,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2142000049352646,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.06040000170469284,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7264000177383423},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6147000193595886},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5044999718666077},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.49390000104904175},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.42410001158714294},{"id":"https://openalex.org/keywords/execution-model","display_name":"Execution model","score":0.3984000086784363},{"id":"https://openalex.org/keywords/adder","display_name":"Adder","score":0.39430001378059387},{"id":"https://openalex.org/keywords/multiplier","display_name":"Multiplier (economics)","score":0.35429999232292175},{"id":"https://openalex.org/keywords/inference-engine","display_name":"Inference engine","score":0.35370001196861267}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7594000101089478},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7264000177383423},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6147000193595886},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5044999718666077},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.49390000104904175},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.42410001158714294},{"id":"https://openalex.org/C2776834041","wikidata":"https://www.wikidata.org/wiki/Q25346349","display_name":"Execution model","level":2,"score":0.3984000086784363},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3959999978542328},{"id":"https://openalex.org/C164620267","wikidata":"https://www.wikidata.org/wiki/Q376953","display_name":"Adder","level":3,"score":0.39430001378059387},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.38749998807907104},{"id":"https://openalex.org/C124584101","wikidata":"https://www.wikidata.org/wiki/Q1053266","display_name":"Multiplier (economics)","level":2,"score":0.35429999232292175},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.35370001196861267},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.34769999980926514},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.34619998931884766},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.3100999891757965},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.30979999899864197},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3003000020980835},{"id":"https://openalex.org/C167822520","wikidata":"https://www.wikidata.org/wiki/Q176452","display_name":"Finite-state machine","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.28540000319480896},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.28529998660087585},{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.27900001406669617},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.2768000066280365},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C2777767291","wikidata":"https://www.wikidata.org/wiki/Q1080291","display_name":"Sizing","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.2639999985694885},{"id":"https://openalex.org/C19275194","wikidata":"https://www.wikidata.org/wiki/Q222903","display_name":"Multiplexing","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.259799987077713}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3725843.3756121","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3725843.3756121","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 58th IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2507.10178","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.10178","pdf_url":"https://arxiv.org/pdf/2507.10178","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:DiVA.org:uu-574839","is_oa":true,"landing_page_url":"http://urn.kb.se/resolve?urn=urn:nbn:se:uu:diva-574839","pdf_url":"https://uu.diva-portal.org/smash/get/diva2:2026046/FULLTEXT01","source":{"id":"https://openalex.org/S4306401559","display_name":"KTH Publication Database DiVA (KTH Royal Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2507.10178","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.10178","pdf_url":"https://arxiv.org/pdf/2507.10178","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4700831490","display_name":null,"funder_award_id":"2022-","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G5906391025","display_name":null,"funder_award_id":"2022-0-01037","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"}],"funders":[{"id":"https://openalex.org/F4320307601","display_name":"Idaho Humanities Council","ror":"https://ror.org/02jjxqt79"},{"id":"https://openalex.org/F4320322093","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10"},{"id":"https://openalex.org/F4320322202","display_name":"IC Design Education Center","ror":"https://ror.org/005v57z85"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2053987390","https://openalex.org/W2120000030","https://openalex.org/W2725159389","https://openalex.org/W2761132374","https://openalex.org/W2946609015","https://openalex.org/W2963015836","https://openalex.org/W2998617917","https://openalex.org/W3100710793","https://openalex.org/W3133253223","https://openalex.org/W3158634533","https://openalex.org/W3194676777","https://openalex.org/W3208633927","https://openalex.org/W4387321091","https://openalex.org/W4388757726","https://openalex.org/W4389524555","https://openalex.org/W4392427708","https://openalex.org/W4393407040","https://openalex.org/W4393407316","https://openalex.org/W4394998532","https://openalex.org/W4394998968","https://openalex.org/W4395073431","https://openalex.org/W4404848672","https://openalex.org/W4404955380","https://openalex.org/W4412888538"],"related_works":[],"abstract_inverted_index":{"Transformers":[0],"are":[1,105],"the":[2,13,38,92,161],"driving":[3],"force":[4],"behind":[5],"today's":[6],"Large":[7],"Language":[8],"Models":[9],"(LLMs),":[10],"serving":[11,72],"as":[12,46,62,160,171],"foundation":[14],"for":[15,33],"their":[16,21,101],"performance":[17,93],"and":[18,23,53,79,97,119,145,202,214,225,233],"versatility.":[19],"Yet,":[20],"compute":[22],"memory":[24,109],"costs":[25],"grow":[26],"with":[27],"sequence":[28],"length,":[29],"posing":[30],"scalability":[31],"challenges":[32],"long-context":[34],"inferencing.":[35],"In":[36],"response,":[37],"algorithm":[39],"community":[40],"is":[41],"exploring":[42],"alternative":[43],"architectures,":[44],"such":[45],"state":[47,120,131,212],"space":[48],"models":[49],"(SSMs),":[50],"linear":[51],"attention,":[52,135],"recurrent":[54],"neural":[55],"networks":[56],"(RNNs),":[57],"which":[58],"we":[59,90,156,168],"refer":[60],"to":[61,115,184,188,222,231],"post-transformers.":[63,123],"This":[64],"shift":[65],"presents":[66],"a":[67,71,83,193],"key":[68],"challenge:":[69],"building":[70],"system":[73],"that":[74,198],"efficiently":[75],"supports":[76],"both":[77,104],"transformer":[78,96],"post-transformer":[80,98],"LLMs":[81],"within":[82],"unified":[84],"framework.":[85],"To":[86],"address":[87],"this":[88],"challenge,":[89],"analyze":[91],"characteristics":[94],"of":[95,174,211],"LLMs.":[99],"Despite":[100],"algorithmic":[102],"differences,":[103],"fundamentally":[106],"limited":[107],"by":[108],"bandwidth":[110],"under":[111],"batched":[112],"inference":[113],"due":[114],"attention":[116,215],"in":[117,122],"transformers":[118],"updates":[121],"Further":[124],"analyses":[125],"suggest":[126],"two":[127,182],"additional":[128],"insights:":[129],"(1)":[130],"update":[132,213],"operations,":[133],"unlike":[134],"incur":[136],"high":[137],"hardware":[138],"cost,":[139],"making":[140],"per-bank":[141],"PIM":[142],"acceleration":[143],"inefficient,":[144],"(2)":[146],"different":[147],"low-precision":[148],"arithmetic":[149],"methods":[150],"offer":[151],"varying":[152],"accuracy-area":[153],"tradeoffs,":[154],"while":[155],"identify":[157],"Microsoft's":[158],"MX":[159],"Pareto-optimal":[162],"choice.":[163],"Building":[164],"on":[165],"these":[166],"insights,":[167],"design":[169],"Pimba":[170,228],"an":[172],"array":[173],"State-update":[175,194],"Processing":[176,195],"Units":[177],"(SPUs),":[178],"each":[179],"shared":[180],"between":[181],"banks":[183],"enable":[185],"interleaved":[186],"access":[187],"PIM.":[189],"Each":[190],"SPU":[191],"includes":[192],"Engine":[196],"(SPE)":[197],"comprises":[199],"element-wise":[200],"multipliers":[201],"adders":[203],"using":[204],"MX-based":[205],"quantized":[206],"arithmetic,":[207],"enabling":[208],"efficient":[209],"execution":[210],"operations.":[216],"Our":[217],"evaluation":[218],"shows":[219],"that,":[220],"compared":[221],"LLM-optimized":[223],"GPU":[224],"GPU+PIM":[226],"systems,":[227],"achieves":[229],"up":[230],"4.1x":[232],"2.1x":[234],"higher":[235],"token":[236],"generation":[237],"throughput,":[238],"respectively.":[239]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
