{"id":"https://openalex.org/W7117596669","doi":"https://doi.org/10.1109/iccd65941.2025.00038","title":"PriME: PIM-Aware Efficient Compression for Memory-Bound Embedding Layers in sLLMs","display_name":"PriME: PIM-Aware Efficient Compression for Memory-Bound Embedding Layers in sLLMs","publication_year":2025,"publication_date":"2025-11-10","ids":{"openalex":"https://openalex.org/W7117596669","doi":"https://doi.org/10.1109/iccd65941.2025.00038"},"language":null,"primary_location":{"id":"doi:10.1109/iccd65941.2025.00038","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccd65941.2025.00038","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 43rd International Conference on Computer Design (ICCD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016688645","display_name":"J. Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I118373667","display_name":"Seoul National University of Science and Technology","ror":"https://ror.org/00chfja07","country_code":"KR","type":"education","lineage":["https://openalex.org/I118373667"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Junghyeok Lee","raw_affiliation_strings":["Seoul National University of Science and Technology,Department of Electrical and Information Engineering and Research Center for Electrical and Information Technology,Seoul,Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University of Science and Technology,Department of Electrical and Information Engineering and Research Center for Electrical and Information Technology,Seoul,Korea","institution_ids":["https://openalex.org/I118373667"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062999054","display_name":"J. S. Jang","orcid":null},"institutions":[{"id":"https://openalex.org/I118373667","display_name":"Seoul National University of Science and Technology","ror":"https://ror.org/00chfja07","country_code":"KR","type":"education","lineage":["https://openalex.org/I118373667"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jihoon Jang","raw_affiliation_strings":["Seoul National University of Science and Technology,Department of Electrical and Information Engineering and Research Center for Electrical and Information Technology,Seoul,Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University of Science and Technology,Department of Electrical and Information Engineering and Research Center for Electrical and Information Technology,Seoul,Korea","institution_ids":["https://openalex.org/I118373667"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030004296","display_name":"Hyun\u2010Sik Kim","orcid":"https://orcid.org/0000-0001-8934-4042"},"institutions":[{"id":"https://openalex.org/I118373667","display_name":"Seoul National University of Science and Technology","ror":"https://ror.org/00chfja07","country_code":"KR","type":"education","lineage":["https://openalex.org/I118373667"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyun Kim","raw_affiliation_strings":["Seoul National University of Science and Technology,Department of Electrical and Information Engineering and Research Center for Electrical and Information Technology,Seoul,Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University of Science and Technology,Department of Electrical and Information Engineering and Research Center for Electrical and Information Technology,Seoul,Korea","institution_ids":["https://openalex.org/I118373667"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I118373667"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.61318458,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"221","last_page":"228"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.2378000020980835,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.2378000020980835,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.18729999661445618,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.12359999865293503,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7998999953269958},{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.6869999766349792},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6283000111579895},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.4950000047683716},{"id":"https://openalex.org/keywords/data-compression-ratio","display_name":"Data compression ratio","score":0.4832000136375427},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.4652999937534332},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.41830000281333923},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.3610000014305115},{"id":"https://openalex.org/keywords/von-neumann-architecture","display_name":"Von Neumann architecture","score":0.3508000075817108}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7998999953269958},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7161999940872192},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.6869999766349792},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6283000111579895},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.49810001254081726},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.4950000047683716},{"id":"https://openalex.org/C94835093","wikidata":"https://www.wikidata.org/wiki/Q3113333","display_name":"Data compression ratio","level":5,"score":0.4832000136375427},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.4652999937534332},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.41830000281333923},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3736000061035156},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.3610000014305115},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3546000123023987},{"id":"https://openalex.org/C80469333","wikidata":"https://www.wikidata.org/wiki/Q189088","display_name":"Von Neumann architecture","level":2,"score":0.3508000075817108},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3483000099658966},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.3476000130176544},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.3465999960899353},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.326200008392334},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.30979999899864197},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.29820001125335693},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.29030001163482666},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.28630000352859497},{"id":"https://openalex.org/C2778192920","wikidata":"https://www.wikidata.org/wiki/Q16874989","display_name":"Signal compression","level":4,"score":0.2840000092983246},{"id":"https://openalex.org/C3073032","wikidata":"https://www.wikidata.org/wiki/Q15912075","display_name":"Information hiding","level":3,"score":0.2827000021934509},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.28209999203681946},{"id":"https://openalex.org/C46900642","wikidata":"https://www.wikidata.org/wiki/Q2647","display_name":"Huffman coding","level":3,"score":0.2759000062942505},{"id":"https://openalex.org/C153338461","wikidata":"https://www.wikidata.org/wiki/Q2651","display_name":"Arithmetic coding","level":4,"score":0.274399995803833},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.26600000262260437},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.2615000009536743},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.25459998846054077},{"id":"https://openalex.org/C165435473","wikidata":"https://www.wikidata.org/wiki/Q1509884","display_name":"Padding","level":2,"score":0.2531000077724457}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccd65941.2025.00038","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccd65941.2025.00038","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 43rd International Conference on Computer Design (ICCD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1976494130","https://openalex.org/W2082375193","https://openalex.org/W2114668191","https://openalex.org/W2147657598","https://openalex.org/W2162752393","https://openalex.org/W2516109628","https://openalex.org/W2774409802","https://openalex.org/W2790546557","https://openalex.org/W2798724095","https://openalex.org/W2963347649","https://openalex.org/W2979826702","https://openalex.org/W2982008795","https://openalex.org/W3099413717","https://openalex.org/W3102510044","https://openalex.org/W3133253223","https://openalex.org/W3189166979","https://openalex.org/W4221001402","https://openalex.org/W4250043344","https://openalex.org/W4280582964","https://openalex.org/W4399376085","https://openalex.org/W4400811646","https://openalex.org/W4401024916","https://openalex.org/W4409347104","https://openalex.org/W4411486388","https://openalex.org/W4411725693","https://openalex.org/W4412887059","https://openalex.org/W4415799245"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,36,44,52,79],"growing":[2],"demand":[3],"for":[4,128,231,243,254],"on-device":[5],"AI,":[6],"increasing":[7],"efforts":[8],"have":[9],"been":[10],"directed":[11],"toward":[12],"deploying":[13],"lightweight":[14],"small-scale":[15],"large":[16],"language":[17,249],"models":[18],"(sLLMs)":[19],"on":[20],"edge":[21],"and":[22,33,56,71,132,158,199,240],"mobile":[23],"devices":[24],"to":[25,78,107,141,201,207,219,245],"enhance":[26],"inference":[27],"performance":[28],"while":[29,171,209],"minimizing":[30],"computational":[31],"cost":[32],"latency.":[34],"As":[35],"number":[37],"of":[38,51,166,183,194,225],"decoder":[39],"layers":[40],"in":[41,145,152],"sLLMs":[42,239],"decreases,":[43],"embedding":[45,81,84,130,146,189],"layer":[46,85],"constitutes":[47],"a":[48,100,123,175],"substantial":[49],"portion":[50],"model's":[53],"overall":[54],"parameters":[55],"memory":[57,143],"consumption.":[58],"Consequently,":[59],"efficient":[60,255],"data":[61,155,185],"compression":[62,125,164],"is":[63,235],"crucial;":[64],"however,":[65],"existing":[66],"methods,":[67],"such":[68],"as":[69],"quantization":[70],"pruning,":[72],"drastically":[73],"degrade":[74],"accuracy":[75],"when":[76],"applied":[77],"error-sensitive":[80],"layers.":[82,147],"Moreover,":[83],"computations":[86,190],"exhibit":[87],"low":[88],"arithmetic":[89],"intensity":[90],"(operations":[91],"per":[92],"byte),":[93],"rendering":[94],"them":[95],"memory-bound.":[96],"This":[97],"limitation":[98],"necessitates":[99],"shift":[101],"from":[102],"conventional":[103],"von":[104],"Neumann":[105],"architectures":[106],"processing-in-memory":[108],"(PIM)":[109],"architectures.":[110],"To":[111],"address":[112],"these":[113],"challenges,":[114],"this":[115],"paper":[116],"proposes":[117],"(1)":[118],"XOR-based":[119],"Masking":[120],"Compression":[121],"(XMC),":[122],"lossless":[124],"algorithm":[126],"specialized":[127],"sLLM":[129],"layers,":[131],"(2)":[133],"PriME,":[134],"which":[135],"integrates":[136],"XMC":[137,148],"with":[138,174,237],"PIM":[139],"architecture":[140],"alleviate":[142],"bottlenecks":[144],"enhances":[149],"zero-bit":[150],"representation":[151],"16-bit":[153],"FP":[154],"using":[156],"ADD":[157],"XOR":[159],"masking,":[160],"achieving":[161],"an":[162,192,220],"average":[163,193,221],"ratio":[165],"<tex":[167,195,202,226],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[168,196,203,227],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1.49":[169],"\\times$</tex>":[170,198,205],"being":[172],"implementable":[173],"3-cycle":[176],"decompression":[177],"delay.":[178],"PriME":[179,234],"enables":[180],"parallel":[181],"processing":[182],"compressed":[184],"within":[186],"PIM,":[187],"accelerating":[188],"by":[191,214],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$4.0":[197],"up":[200],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$5.26":[204],"compared":[206],"GPUs,":[208],"simultaneously":[210],"reducing":[211],"energy":[212,222],"consumption":[213],"over":[215],"30":[216],"%,":[217],"leading":[218],"efficiency":[223],"improvement":[224],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$6.29":[228],"\\times$</tex>.":[229],"Designed":[230],"broad":[232],"applicability,":[233],"compatible":[236],"various":[238],"holds":[241],"scalability":[242],"extension":[244],"multimodal":[246],"small":[247],"vision":[248],"models,":[250],"demonstrating":[251],"its":[252],"versatility":[253],"AI":[256],"acceleration.":[257]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-12-30T00:00:00"}
