{"id":"https://openalex.org/W4415624822","doi":"https://doi.org/10.1109/tc.2025.3626449","title":"MI-LLM: Multiplier-Free LLM Inference on Commodity Processing-in-Memory Hardware","display_name":"MI-LLM: Multiplier-Free LLM Inference on Commodity Processing-in-Memory Hardware","publication_year":2025,"publication_date":"2025-10-28","ids":{"openalex":"https://openalex.org/W4415624822","doi":"https://doi.org/10.1109/tc.2025.3626449"},"language":null,"primary_location":{"id":"doi:10.1109/tc.2025.3626449","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2025.3626449","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114195832","display_name":"Puyun Hu","orcid":"https://orcid.org/0009-0001-2630-2918"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Puyun Hu","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China","School of InformationRenmin University of China"],"raw_orcid":"https://orcid.org/0009-0001-2630-2918","affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of InformationRenmin University of China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101139637","display_name":"Minhui Xie","orcid":"https://orcid.org/0000-0001-6684-8336"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minhui Xie","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China","School of InformationRenmin University of China"],"raw_orcid":"https://orcid.org/0000-0001-6684-8336","affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of InformationRenmin University of China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5095777523","display_name":"Linjiang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linjiang Li","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China","School of InformationRenmin University of China"],"raw_orcid":"https://orcid.org/0009-0005-0941-2639","affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of InformationRenmin University of China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kuiyaohui Zhang","orcid":"https://orcid.org/0009-0009-1945-3521"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kuiyaohui Zhang","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China","School of InformationRenmin University of China"],"raw_orcid":"https://orcid.org/0009-0009-1945-3521","affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of InformationRenmin University of China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5095777522","display_name":"Erge Xiang","orcid":null},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Erge Xiang","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China","School of InformationRenmin University of China"],"raw_orcid":"https://orcid.org/0009-0005-2963-2479","affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of InformationRenmin University of China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100378697","display_name":"Jing Wang","orcid":"https://orcid.org/0000-0003-3653-7013"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Wang","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China","School of InformationRenmin University of China"],"raw_orcid":"https://orcid.org/0000-0003-3653-7013","affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of InformationRenmin University of China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072186367","display_name":"Size Zheng","orcid":"https://orcid.org/0000-0002-9471-1780"},"institutions":[{"id":"https://openalex.org/I4210121368","display_name":"Machine Science","ror":"https://ror.org/02hrr9v50","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210121368"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Size Zheng","raw_affiliation_strings":["Tsinghua University, Beijing, China","machine learning system researcher scientist at ByteDance"],"raw_orcid":"https://orcid.org/0000-0002-9471-1780","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"machine learning system researcher scientist at ByteDance","institution_ids":["https://openalex.org/I4210121368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100320957","display_name":"Xiao Zhang","orcid":"https://orcid.org/0009-0001-1857-1368"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao Zhang","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China","School of InformationRenmin University of China"],"raw_orcid":"https://orcid.org/0009-0001-1857-1368","affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of InformationRenmin University of China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101440226","display_name":"Yunpeng Chai","orcid":"https://orcid.org/0000-0002-3286-9259"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunpeng Chai","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China","School of InformationRenmin University of China"],"raw_orcid":"https://orcid.org/0000-0002-3286-9259","affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of InformationRenmin University of China","institution_ids":["https://openalex.org/I78988378"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5114195832"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15837408,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"75","issue":"2","first_page":"503","last_page":"515"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2363000065088272,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2363000065088272,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.12999999523162842,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.1257999986410141,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lookup-table","display_name":"Lookup table","score":0.6549000144004822},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.6421999931335449},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6108999848365784},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5943999886512756},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5185999870300293},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.4268999993801117},{"id":"https://openalex.org/keywords/high-memory","display_name":"High memory","score":0.40700000524520874},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.4027999937534332}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8745999932289124},{"id":"https://openalex.org/C134835016","wikidata":"https://www.wikidata.org/wiki/Q690265","display_name":"Lookup table","level":2,"score":0.6549000144004822},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.6421999931335449},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6108999848365784},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5943999886512756},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5489000082015991},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5185999870300293},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.4268999993801117},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4239000082015991},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.40790000557899475},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.40700000524520874},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.4027999937534332},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.398499995470047},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.3513000011444092},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.3375999927520752},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.3296000063419342},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.313400000333786},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.3068000078201294},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3034999966621399},{"id":"https://openalex.org/C3720319","wikidata":"https://www.wikidata.org/wiki/Q5015937","display_name":"Cache-only memory architecture","level":5,"score":0.28679999709129333},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.28349998593330383},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.260699987411499},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.259799987077713}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tc.2025.3626449","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2025.3626449","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W2759259332","https://openalex.org/W3042598257","https://openalex.org/W3134274954","https://openalex.org/W3157531038","https://openalex.org/W3202917172","https://openalex.org/W4214734582","https://openalex.org/W4285121610","https://openalex.org/W4297097348","https://openalex.org/W4301607055","https://openalex.org/W4304140739","https://openalex.org/W4321448334","https://openalex.org/W4361017256","https://openalex.org/W4380624072","https://openalex.org/W4381894723","https://openalex.org/W4386764197","https://openalex.org/W4387212331","https://openalex.org/W4387321091","https://openalex.org/W4392427708","https://openalex.org/W4394999021"],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1,11],"models":[2],"(LLMs)":[3],"are":[4],"prominent":[5],"for":[6,19,78],"their":[7],"superior":[8],"ability":[9],"in":[10,250,267,274],"understanding":[12],"and":[13,38,74,113,144,215,246],"generation.":[14],"However,":[15,83],"a":[16,175,228,242,263],"notorious":[17],"problem":[18],"LLM":[20,80,86,129],"inference":[21,81,87,130],"is":[22,137],"low":[23],"computational":[24],"utilization":[25],"caused":[26,106,167,192],"by":[27,107,168,193],"the":[28,60,64,108,154,163,169,182,204,211,289],"memory":[29,36,57,111,200],"bottleneck,":[30],"since":[31],"it":[32],"typically":[33],"requires":[34,279],"large":[35],"capacity":[37],"high":[39],"bandwidth":[40],"to":[41,96,138,180,219,232,257],"process":[42],"neural":[43],"weights.":[44],"By":[45],"integrating":[46],"processing":[47],"cores":[48],"into":[49],"memory,":[50],"Processing-In-Memory":[51],"(PIM)":[52],"architecture":[53],"excels":[54],"at":[55],"alleviating":[56],"bottleneck;":[58],"with":[59,148,188,210],"recent":[61],"release":[62],"of":[63,156,171,206,213],"first":[65],"commodity":[66],"near-bank":[67],"PIM":[68,71,198],"hardware":[69],"(NBP),":[70],"becomes":[72],"off-the-shelf":[73],"shows":[75],"great":[76],"potential":[77],"accelerating":[79],"practically.":[82],"simply":[84],"shoehorning":[85],"on":[88,131,150],"NBP":[89,132],"can":[90],"not":[91],"achieve":[92],"satisfactory":[93],"performance":[94],"due":[95],"its":[97],"inherent":[98],"limitations:":[99],"weak":[100,157],"compute":[101,158],"performance,":[102],"frequent":[103,189],"cache":[104,190],"misses":[105,191],"limited":[109],"working":[110,199],"capacity,":[112,201],"poor":[114],"inter-PIM-core":[115,234],"communication":[116],"bandwidth.":[117],"To":[118,161,186],"address":[119],"these":[120],"limitations,":[121],"we":[122],"propose":[123],"MI-LLM,":[124],"an":[125,247],"efficient":[126],"system":[127],"deploying":[128],"hardware.":[133],"Its":[134],"key":[135],"idea":[136],"build":[139],"NBP-aware":[140],"Lookup":[141],"Tables":[142],"(LUTs)":[143],"completely":[145],"replace":[146],"multiplications":[147],"lookups":[149],"LUTs,":[151],"thereby":[152],"mitigating":[153],"limitation":[155],"performance.":[159],"1)":[160],"reduce":[162],"model":[164,183,229],"accuracy":[165,271],"drop":[166],"use":[170],"LUT,":[172],"MI-LLM":[173,202,225,240,260,278],"tailors":[174],"learning-based":[176],"LUT":[177,194,221],"construction":[178],"method":[179],"maintain":[181],"accuracy.":[184],"2)":[185],"cope":[187],"sizes":[195],"far":[196],"exceeding":[197],"introduces":[203],"design":[205],"PIM-aware":[207],"linear":[208],"kernel,":[209],"optimization":[212],"intra-row":[214],"inter-row":[216],"reordering":[217],"enabled,":[218],"enhance":[220],"lookup":[222],"locality.":[223],"3)":[224],"further":[226],"proposes":[227],"partitioning":[230],"scheme":[231],"minimize":[233],"communication.":[235],"Kernel-level":[236],"benchmarks":[237],"reveal":[238],"that":[239],"achieves":[241],"9%":[243],"throughput":[244],"improvement":[245],"11%":[248],"increase":[249,266],"energy":[251],"efficiency":[252],"over":[253],"GPU":[254,290],"implementations.":[255],"Compared":[256],"FP8":[258],"quantization,":[259],"incurs":[261],"only":[262],"0.24":[264],"times":[265],"perplexity,":[268],"demonstrating":[269],"minimal":[270],"degradation.":[272],"Moreover,":[273],"our":[275],"end-to-end":[276],"evaluation,":[277],"80%":[280],"fewer":[281],"ALU":[282],"operation":[283],"ticks":[284],"per":[285],"output":[286],"token":[287],"than":[288],"baseline.":[291]},"counts_by_year":[],"updated_date":"2026-01-16T23:16:36.188383","created_date":"2025-10-28T00:00:00"}
