{"id":"https://openalex.org/W7085064285","doi":"https://doi.org/10.1109/lca.2025.3618104","title":"Low-Latency PIM Accelerator for Edge LLM Inference","display_name":"Low-Latency PIM Accelerator for Edge LLM Inference","publication_year":2025,"publication_date":"2025-07-01","ids":{"openalex":"https://openalex.org/W7085064285","doi":"https://doi.org/10.1109/lca.2025.3618104"},"language":"en","primary_location":{"id":"doi:10.1109/lca.2025.3618104","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lca.2025.3618104","pdf_url":null,"source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Xinyu Wang","orcid":"https://orcid.org/0009-0007-2775-2068"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinyu Wang","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xiaotian Sun","orcid":"https://orcid.org/0009-0003-8068-7024"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaotian Sun","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Wanqian Li","orcid":"https://orcid.org/0000-0003-3741-535X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wanqian Li","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Feng Min","orcid":"https://orcid.org/0000-0002-0107-0196"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Min","raw_affiliation_strings":["State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xiaoyu Zhang","orcid":"https://orcid.org/0000-0003-2165-7151"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyu Zhang","raw_affiliation_strings":["State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xinjiang Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinjiang Zhang","raw_affiliation_strings":["State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yinhe Han","orcid":"https://orcid.org/0000-0003-0904-6681"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinhe Han","raw_affiliation_strings":["State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":null,"display_name":"Xiaoming Chen","orcid":"https://orcid.org/0000-0002-7337-1844"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoming Chen","raw_affiliation_strings":["State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210090176"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.63304033,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"24","issue":"2","first_page":"321","last_page":"324"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10021","display_name":"EFL/ESL Teaching and Learning","score":0.43209999799728394,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10021","display_name":"EFL/ESL Teaching and Learning","score":0.43209999799728394,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11587","display_name":"Second Language Acquisition and Learning","score":0.07750000059604645,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T14037","display_name":"Arabic Language Education Studies","score":0.028999999165534973,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.746399998664856},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6456000208854675},{"id":"https://openalex.org/keywords/edge-computing","display_name":"Edge computing","score":0.5978999733924866},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.5464000105857849},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5268999934196472},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5216000080108643},{"id":"https://openalex.org/keywords/resistive-touchscreen","display_name":"Resistive touchscreen","score":0.48399999737739563},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.42500001192092896},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.39649999141693115}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.824400007724762},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.746399998664856},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6456000208854675},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.5978999733924866},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.5464000105857849},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5268999934196472},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5216000080108643},{"id":"https://openalex.org/C6899612","wikidata":"https://www.wikidata.org/wiki/Q852911","display_name":"Resistive touchscreen","level":2,"score":0.48399999737739563},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4426000118255615},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.42500001192092896},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.39649999141693115},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.38029998540878296},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.3785000145435333},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.35530000925064087},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.3149999976158142},{"id":"https://openalex.org/C509933004","wikidata":"https://www.wikidata.org/wiki/Q194163","display_name":"Broadband","level":2,"score":0.2858000099658966},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.28060001134872437},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2777000069618225},{"id":"https://openalex.org/C195818886","wikidata":"https://www.wikidata.org/wiki/Q5421724","display_name":"Expressive power","level":2,"score":0.26570001244544983},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.26489999890327454},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C131017901","wikidata":"https://www.wikidata.org/wiki/Q170451","display_name":"Logic gate","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C189930140","wikidata":"https://www.wikidata.org/wiki/Q1112878","display_name":"CAS latency","level":4,"score":0.2581999897956848},{"id":"https://openalex.org/C49289754","wikidata":"https://www.wikidata.org/wiki/Q2267081","display_name":"Side channel attack","level":3,"score":0.25780001282691956},{"id":"https://openalex.org/C182019814","wikidata":"https://www.wikidata.org/wiki/Q1143830","display_name":"Resistive random-access memory","level":3,"score":0.2565999925136566},{"id":"https://openalex.org/C2994168587","wikidata":"https://www.wikidata.org/wiki/Q5295","display_name":"Random access memory","level":2,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lca.2025.3618104","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lca.2025.3618104","pdf_url":null,"source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1870629342","display_name":null,"funder_award_id":"62495104","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4444615682","display_name":null,"funder_award_id":"62025404","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4906109994","display_name":null,"funder_award_id":"62488101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322847","display_name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","ror":"https://ror.org/031141b54"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W4220972538","https://openalex.org/W4386980061","https://openalex.org/W4394698423","https://openalex.org/W4395073431","https://openalex.org/W4402627654","https://openalex.org/W4411485931"],"related_works":[],"abstract_inverted_index":{"Deploying":[0],"large":[1],"language":[2],"models":[3],"(LLMs)":[4],"on":[5],"edge":[6,25,36,78,103,144],"devices":[7,26],"has":[8],"the":[9,19,31,86,133],"potentials":[10],"of":[11,23,35,88],"low-latency":[12,102,143],"inference":[13],"and":[14,57,71,120,155,162],"privacy":[15],"protection.":[16],"However,":[17],"meeting":[18],"substantial":[20],"bandwidth":[21],"demands":[22],"latency-oriented":[24],"is":[27,44,73],"challenging":[28],"due":[29],"to":[30,52],"strict":[32],"power":[33,56],"constraints":[34],"devices.":[37],"Resistive":[38],"random-access":[39,127],"memory":[40,128],"(RRAM)-based":[41],"processing-in-memory":[42],"(PIM)":[43],"an":[45,107,140],"ideal":[46],"solution":[47],"for":[48,69,97,131,142],"this":[49],"challenge,":[50],"thanks":[51],"its":[53],"low":[54],"read":[55],"high":[58],"internal":[59],"bandwidth.":[60],"Moreover,":[61],"applying":[62],"quantization":[63],"methods,":[64],"which":[65],"require":[66],"different":[67],"precisions":[68],"weights":[70],"activations,":[72],"a":[74,125,160],"common":[75],"practice":[76],"in":[77],"inference.":[79,146],"But":[80],"existing":[81],"accelerators":[82],"cannot":[83],"fully":[84],"leverage":[85],"benefits":[87],"quantization,":[89],"as":[90],"they":[91],"lack":[92],"multiply-accumulate":[93],"(MAC)":[94],"units":[95],"optimized":[96],"mixed-precision":[98],"operands.":[99],"To":[100],"achieve":[101],"inference,":[104],"we":[105,137],"design":[106],"RRAM-based":[108],"PIM":[109,165],"die":[110,130],"that":[111,150],"integrates":[112],"dedicated":[113],"energy-efficient":[114],"MAC":[115],"units,":[116],"providing":[117],"both":[118],"computation":[119],"storage":[121],"capabilities.":[122],"Coupled":[123],"with":[124],"dynamic":[126],"(DRAM)":[129],"storing":[132],"key-value":[134],"(KV)":[135],"cache,":[136],"propose":[138],"Lyla,":[139],"accelerator":[141],"LLM":[145],"Experimental":[147],"results":[148],"show":[149],"Lyla":[151],"achieves":[152],"3.8\u00d7,":[153],"2.4\u00d7,":[154],"1.2\u00d7":[156],"latency":[157],"improvements":[158],"over":[159],"GPU":[161],"two":[163],"DRAM-based":[164],"accelerators,":[166],"respectively.":[167]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
