{"id":"https://openalex.org/W7133481372","doi":"https://doi.org/10.1109/hpca68181.2026.11408452","title":"AQPIM: Breaking the PIM Capacity Wall for LLMs with in-Memory Activation Quantization","display_name":"AQPIM: Breaking the PIM Capacity Wall for LLMs with in-Memory Activation Quantization","publication_year":2026,"publication_date":"2026-01-31","ids":{"openalex":"https://openalex.org/W7133481372","doi":"https://doi.org/10.1109/hpca68181.2026.11408452"},"language":"en","primary_location":{"id":"doi:10.1109/hpca68181.2026.11408452","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408452","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2604.18137","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128111484","display_name":"Kosuke Matsushima","orcid":null},"institutions":[{"id":"https://openalex.org/I4210085920","display_name":"Shanghai Institute for Science of Science","ror":"https://ror.org/004srxn73","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210085920"]},{"id":"https://openalex.org/I4400009020","display_name":"Institute of Science Tokyo","ror":"https://ror.org/05dqf9946","country_code":null,"type":"education","lineage":["https://openalex.org/I4400009020"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kosuke Matsushima","raw_affiliation_strings":["Institute of Science Tokyo"],"affiliations":[{"raw_affiliation_string":"Institute of Science Tokyo","institution_ids":["https://openalex.org/I4210085920","https://openalex.org/I4400009020"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009538201","display_name":"Yasuyuki Okoshi","orcid":"https://orcid.org/0009-0005-8472-7841"},"institutions":[{"id":"https://openalex.org/I4210085920","display_name":"Shanghai Institute for Science of Science","ror":"https://ror.org/004srxn73","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210085920"]},{"id":"https://openalex.org/I4400009020","display_name":"Institute of Science Tokyo","ror":"https://ror.org/05dqf9946","country_code":null,"type":"education","lineage":["https://openalex.org/I4400009020"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yasuyuki Okoshi","raw_affiliation_strings":["Institute of Science Tokyo"],"affiliations":[{"raw_affiliation_string":"Institute of Science Tokyo","institution_ids":["https://openalex.org/I4210085920","https://openalex.org/I4400009020"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089388923","display_name":"Masato Motomura","orcid":null},"institutions":[{"id":"https://openalex.org/I4210085920","display_name":"Shanghai Institute for Science of Science","ror":"https://ror.org/004srxn73","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210085920"]},{"id":"https://openalex.org/I4400009020","display_name":"Institute of Science Tokyo","ror":"https://ror.org/05dqf9946","country_code":null,"type":"education","lineage":["https://openalex.org/I4400009020"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Masato Motomura","raw_affiliation_strings":["Institute of Science Tokyo"],"affiliations":[{"raw_affiliation_string":"Institute of Science Tokyo","institution_ids":["https://openalex.org/I4210085920","https://openalex.org/I4400009020"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058871698","display_name":"Daichi Fujiki","orcid":null},"institutions":[{"id":"https://openalex.org/I4210085920","display_name":"Shanghai Institute for Science of Science","ror":"https://ror.org/004srxn73","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210085920"]},{"id":"https://openalex.org/I4400009020","display_name":"Institute of Science Tokyo","ror":"https://ror.org/05dqf9946","country_code":null,"type":"education","lineage":["https://openalex.org/I4400009020"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daichi Fujiki","raw_affiliation_strings":["Institute of Science Tokyo"],"affiliations":[{"raw_affiliation_string":"Institute of Science Tokyo","institution_ids":["https://openalex.org/I4210085920","https://openalex.org/I4400009020"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5128111484"],"corresponding_institution_ids":["https://openalex.org/I4210085920","https://openalex.org/I4400009020"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.45705516,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.07400000095367432,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.07400000095367432,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.052400000393390656,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.03290000185370445,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.519599974155426},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.26350000500679016},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.2312999963760376},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.21809999644756317}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.519599974155426},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.4684999883174896},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3474000096321106},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.26350000500679016},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.2312999963760376},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.22939999401569366},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.22059999406337738},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.22010000050067902},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.21809999644756317},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19769999384880066}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/hpca68181.2026.11408452","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408452","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2604.18137","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2604.18137","pdf_url":"https://arxiv.org/pdf/2604.18137","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:t2r2.star.titech.ac.jp:50752075","is_oa":false,"landing_page_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100941377","pdf_url":null,"source":{"id":"https://openalex.org/S4377196385","display_name":"Tokyo Tech Research Repository (Tokyo Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114531698","host_organization_name":"Tokyo Institute of Technology","host_organization_lineage":["https://openalex.org/I114531698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Journal Article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2604.18137","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2604.18137","pdf_url":"https://arxiv.org/pdf/2604.18137","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.744996190071106,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W2034861439","https://openalex.org/W2124509324","https://openalex.org/W2346205343","https://openalex.org/W2508602506","https://openalex.org/W2518281301","https://openalex.org/W2772923331","https://openalex.org/W2801000640","https://openalex.org/W2945146780","https://openalex.org/W2963963993","https://openalex.org/W3100985894","https://openalex.org/W3137147200","https://openalex.org/W3185702163","https://openalex.org/W3189166979","https://openalex.org/W3210580311","https://openalex.org/W4236868170","https://openalex.org/W4281660701","https://openalex.org/W4285548014","https://openalex.org/W4300171661","https://openalex.org/W4388757726","https://openalex.org/W4392427708","https://openalex.org/W4394998968","https://openalex.org/W4401211590","https://openalex.org/W4404782921","https://openalex.org/W4404918643","https://openalex.org/W4415797132","https://openalex.org/W4415797534","https://openalex.org/W7133224126"],"related_works":[],"abstract_inverted_index":{"Processing-in-Memory":[0],"(PIM)":[1],"architectures":[2],"offer":[3],"a":[4,117,210],"promising":[5],"solution":[6],"to":[7,88],"the":[8,18,74],"memory":[9,23,45,158],"bottlenecks":[10],"in":[11,35],"data-intensive":[12],"machine":[13],"learning,":[14],"yet":[15],"often":[16,67],"overlook":[17],"growing":[19],"challenge":[20],"of":[21,77,186,199],"activation":[22,86,104,120],"footprint.":[24],"Conventional":[25],"PIM":[26,61,212],"approaches":[27,62],"struggle":[28],"with":[29,54,103,203],"massive":[30],"KV":[31],"cache":[32],"sizes":[33],"generated":[34],"long-context":[36],"scenarios":[37],"by":[38,171],"Transformer-based":[39],"models,":[40],"frequently":[41],"exceeding":[42],"PIM's":[43,55,107,144],"limited":[44],"capacity,":[46],"while":[47],"techniques":[48],"like":[49],"sparse":[50],"attention":[51,164],"can":[52,190],"conflict":[53],"need":[56],"for":[57,72,84,130,163,192],"data":[58],"locality.":[59],"Existing":[60],"and":[63,91,106,148,160],"quantization":[64,87,98,121,138],"methods":[65],"are":[66],"insufficient":[68],"or":[69],"poorly":[70],"suited":[71],"leveraging":[73],"unique":[75],"characteristics":[76,105],"activations.":[78],"This":[79],"work":[80],"identifies":[81],"an":[82],"opportunity":[83],"PIMspecialized":[85],"enhance":[89],"bandwidth":[90,109,147],"compute":[92],"efficiency.":[93],"We":[94],"explore":[95],"clustering-based":[96],"vector":[97],"approaches,":[99],"which":[100],"align":[101],"well":[102],"internal":[108,146],"capabilities.":[110],"Building":[111],"on":[112,124,152],"this,":[113],"we":[114],"introduce":[115],"AQPIM,":[116],"novel":[118],"PIM-aware":[119],"framework":[122],"based":[123],"Product":[125],"Quantization":[126],"(PQ),":[127],"optimizing":[128],"it":[129],"modern":[131],"Large":[132],"Language":[133],"Models":[134],"(LLMs).":[135],"By":[136],"performing":[137],"directly":[139],"within":[140],"memory,":[141],"AQPIM":[142,166,179],"leverages":[143],"high":[145],"enables":[149],"direct":[150],"computation":[151],"compressed":[153],"data,":[154],"significantly":[155],"reducing":[156,185],"both":[157],"footprint":[159],"computational":[161],"overhead":[162],"computation.":[165],"addresses":[167],"PQ's":[168],"accuracy":[169],"challenges":[170],"introducing":[172],"several":[173],"algorithmic":[174],"optimizations.":[175],"Evaluations":[176],"demonstrate":[177],"that":[178,189],"achieves":[180],"significant":[181],"performance":[182],"improvements,":[183],"drastically":[184],"GPU-CPU":[187],"communication":[188],"account":[191],"<tex":[193,204],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[194,205],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$90":[195],"\\sim":[196],"98.5":[197],"\\%$</tex>":[198],"decoding":[200],"latency,":[201],"together":[202],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$3.4":[206],"\\times$</tex>":[207],"speedup":[208],"over":[209],"SOTA":[211],"approach.":[213]},"counts_by_year":[],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2026-03-05T00:00:00"}
