{"id":"https://openalex.org/W4416429513","doi":"https://doi.org/10.1109/iccad66269.2025.11240991","title":"PLAIN: Leveraging High Internal Bandwidth in PIM for Accelerating Large Language Model Inference via Mixed-Precision Quantization","display_name":"PLAIN: Leveraging High Internal Bandwidth in PIM for Accelerating Large Language Model Inference via Mixed-Precision Quantization","publication_year":2025,"publication_date":"2025-10-26","ids":{"openalex":"https://openalex.org/W4416429513","doi":"https://doi.org/10.1109/iccad66269.2025.11240991"},"language":null,"primary_location":{"id":"doi:10.1109/iccad66269.2025.11240991","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccad66269.2025.11240991","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/ACM International Conference On Computer Aided Design (ICCAD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101443061","display_name":"Yiwei Hu","orcid":"https://orcid.org/0000-0002-6713-4148"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yiwei Hu","raw_affiliation_strings":["Shanghai Jiao Tong University,School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,School of Computer Science","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017670541","display_name":"Fangxin Liu","orcid":"https://orcid.org/0000-0002-8769-293X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangxin Liu","raw_affiliation_strings":["Shanghai Jiao Tong University,School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,School of Computer Science","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107269495","display_name":"Zongwu Wang","orcid":"https://orcid.org/0009-0003-2157-4927"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zongwu Wang","raw_affiliation_strings":["Shanghai Jiao Tong University,School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,School of Computer Science","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101870121","display_name":"Yilong Zhao","orcid":"https://orcid.org/0000-0002-4888-9027"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yilong Zhao","raw_affiliation_strings":["Shanghai Jiao Tong University,School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,School of Computer Science","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065875464","display_name":"Tao Yang","orcid":"https://orcid.org/0000-0002-2276-4023"},"institutions":[{"id":"https://openalex.org/I4210160618","display_name":"Huawei Technologies (United Kingdom)","ror":"https://ror.org/056gzgs71","country_code":"GB","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210160618"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Tao Yang","raw_affiliation_strings":["Huawei Technologies Co., Ltd"],"affiliations":[{"raw_affiliation_string":"Huawei Technologies Co., Ltd","institution_ids":["https://openalex.org/I4210160618"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071012979","display_name":"Li Jiang","orcid":"https://orcid.org/0000-0003-2724-0605"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Jiang","raw_affiliation_strings":["Shanghai Jiao Tong University,School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,School of Computer Science","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049487451","display_name":"Haibing Guan","orcid":"https://orcid.org/0000-0002-4714-7400"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haibing Guan","raw_affiliation_strings":["Shanghai Jiao Tong University,School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,School of Computer Science","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101443061"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38086381,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6043000221252441,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6043000221252441,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.08720000088214874,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.0723000019788742,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.8203999996185303},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5842999815940857},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5702999830245972},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5435000061988831},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.5277000069618225},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5241000056266785},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.49380001425743103},{"id":"https://openalex.org/keywords/memory-model","display_name":"Memory model","score":0.48829999566078186}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8799999952316284},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.8203999996185303},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5842999815940857},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5702999830245972},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5435000061988831},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.5277000069618225},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5241000056266785},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.49380001425743103},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.48829999566078186},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4851999878883362},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.38929998874664307},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3734999895095825},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.36629998683929443},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.35839998722076416},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3402000069618225},{"id":"https://openalex.org/C2776834041","wikidata":"https://www.wikidata.org/wiki/Q25346349","display_name":"Execution model","level":2,"score":0.3269999921321869},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.31220000982284546},{"id":"https://openalex.org/C184596265","wikidata":"https://www.wikidata.org/wiki/Q2651576","display_name":"Model of computation","level":3,"score":0.3093999922275543},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.30889999866485596},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.29350000619888306},{"id":"https://openalex.org/C126831891","wikidata":"https://www.wikidata.org/wiki/Q221673","display_name":"Host (biology)","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.28189998865127563},{"id":"https://openalex.org/C27602214","wikidata":"https://www.wikidata.org/wiki/Q1868547","display_name":"Locality of reference","level":3,"score":0.25529998540878296}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccad66269.2025.11240991","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccad66269.2025.11240991","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/ACM International Conference On Computer Aided Design (ICCAD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309612","display_name":"Natural Science Foundation of Shanghai","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2725159389","https://openalex.org/W2752340955","https://openalex.org/W2979719709","https://openalex.org/W2982008795","https://openalex.org/W2982041622","https://openalex.org/W2982479999","https://openalex.org/W3043023836","https://openalex.org/W3100710793","https://openalex.org/W3134274954","https://openalex.org/W3204021316","https://openalex.org/W3209151516","https://openalex.org/W3210580311","https://openalex.org/W4254260720","https://openalex.org/W4292261931","https://openalex.org/W4297097348","https://openalex.org/W4313546932","https://openalex.org/W4380874786","https://openalex.org/W4393406920","https://openalex.org/W4393407040","https://openalex.org/W4394998968","https://openalex.org/W4394999021","https://openalex.org/W4401211602","https://openalex.org/W4404134117","https://openalex.org/W4411486435"],"related_works":[],"abstract_inverted_index":{"DRAM-based":[0],"processing-in-memory":[1],"(DRAM-PIM)":[2],"has":[3],"gained":[4],"commercial":[5],"prominence":[6],"in":[7,31,53,80],"recent":[8],"years.":[9],"However,":[10],"its":[11],"integration":[12],"for":[13,18,91,167,178,198],"deep":[14],"learning":[15],"acceleration,":[16],"particularly":[17],"large":[19],"language":[20],"models":[21],"(LLMs),":[22],"poses":[23],"inherent":[24],"challenges.":[25],"Existing":[26],"DRAM-PIM":[27],"systems":[28,60,177],"are":[29],"limited":[30],"computational":[32],"capabilities,":[33],"primarily":[34],"supporting":[35],"element-wise":[36],"and":[37,73,101,114,144,157,170,195,219,227],"general":[38],"matrix-vector":[39],"multiplication":[40],"(GEMV)":[41],"operations,":[42],"which":[43],"contribute":[44],"only":[45],"a":[46,57,86,127,186,225],"small":[47],"portion":[48],"of":[49,77,99,105,136,175],"the":[50,75,96,102,133,160,171,211,215,220],"execution":[51,202],"time":[52],"LLM":[54],"workloads.":[55],"As":[56],"result,":[58],"current":[59],"still":[61],"require":[62],"powerful":[63,172],"host":[64],"processors":[65],"to":[66,107,147],"manage":[67],"compute-heavy":[68],"operations.To":[69],"address":[70],"these":[71],"challenges":[72],"expand":[74],"applicability":[76],"commodity":[78],"DRAM-PIMs":[79],"accelerating":[81],"LLMs,":[82],"we":[83,125],"introduce":[84],"PLAIN,":[85],"novel":[87,128],"software/hardware":[88],"co-design":[89],"framework":[90,118],"PIM-enabled":[92],"systems.":[93],"PLAIN":[94,152,184,209],"leverages":[95],"distribution":[97],"locality":[98],"parameters":[100,137,218],"unique":[103],"characteristics":[104,146],"PIM":[106,222],"achieve":[108],"optimal":[109,134],"trade-offs":[110],"between":[111],"inference":[112],"cost":[113],"model":[115,233],"quality.":[116],"Our":[117],"includes":[119],"three":[120],"key":[121],"innovations:":[122],"1)":[123],"firstly,":[124],"propose":[126],"quantization":[129],"algorithm":[130],"that":[131,190],"determines":[132],"precision":[135],"within":[138,165],"each":[139],"layer,":[140],"considering":[141],"both":[142,155],"algorithmic":[143],"hardware":[145,149,205],"optimize":[148],"mapping;":[150],"2)":[151],"strategically":[153],"utilizes":[154],"GPUs":[156],"PIMs,":[158],"leveraging":[159],"high":[161],"internal":[162],"memory":[163,196,217],"bandwidth":[164],"HBM":[166],"attention":[168],"layers":[169],"compute":[173],"capability":[174],"conventional":[176,212],"fully":[179],"connected":[180],"(FC)":[181],"layers;":[182],"3)":[183],"integrates":[185],"workload-aware":[187],"dataflow":[188],"scheduler":[189],"efficiently":[191],"arranges":[192],"complex":[193],"computations":[194],"access":[197],"mixed-precision":[199],"tensors,":[200],"optimizing":[201],"across":[203],"different":[204],"components.":[206],"Experiments":[207],"show":[208],"outperforms":[210],"GPU":[213],"with":[214,231],"same":[216],"state-of-the-art":[221],"accelerator,":[223],"achieving":[224],"5.03\u00d7":[226],"1.69\u00d7":[228],"performance":[229],"boost,":[230],"negligible":[232],"quality":[234],"loss.":[235]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-20T00:00:00"}
