{"id":"https://openalex.org/W4414197154","doi":"https://doi.org/10.1109/dac63849.2025.11133264","title":"VQT-CiM: Accelerating Vector Quantization Enhanced Transformer with Ferroelectric Compute-in-Memory","display_name":"VQT-CiM: Accelerating Vector Quantization Enhanced Transformer with Ferroelectric Compute-in-Memory","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414197154","doi":"https://doi.org/10.1109/dac63849.2025.11133264"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11133264","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11133264","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101476409","display_name":"Xuchu Huang","orcid":"https://orcid.org/0000-0001-5900-2455"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xuchu Huang","raw_affiliation_strings":["Zhejiang University,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,Hangzhou,China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086245605","display_name":"Haonan Du","orcid":"https://orcid.org/0009-0003-6345-8991"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haonan Du","raw_affiliation_strings":["Zhejiang University,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,Hangzhou,China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071100651","display_name":"Min Zhou","orcid":"https://orcid.org/0000-0003-2677-5472"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Zhou","raw_affiliation_strings":["Zhejiang University,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,Hangzhou,China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059093077","display_name":"Zheyu Yan","orcid":"https://orcid.org/0000-0003-1830-606X"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheyu Yan","raw_affiliation_strings":["Zhejiang University,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,Hangzhou,China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054211420","display_name":"Cheng Zhuo","orcid":"https://orcid.org/0000-0002-2610-7522"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Zhuo","raw_affiliation_strings":["Zhejiang University,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,Hangzhou,China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071383109","display_name":"Xunzhao Yin","orcid":"https://orcid.org/0000-0003-4656-9545"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xunzhao Yin","raw_affiliation_strings":["Zhejiang University,Hangzhou,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University,Hangzhou,China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101476409"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.926,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.79963937,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9556000232696533,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9556000232696533,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9204000234603882,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6902999877929688},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.459199994802475},{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.4221000075340271},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.38440001010894775},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.3179999887943268},{"id":"https://openalex.org/keywords/upgrade","display_name":"Upgrade","score":0.27900001406669617}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6902999877929688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.623199999332428},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.459199994802475},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.4221000075340271},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.3910999894142151},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.38440001010894775},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.37119999527931213},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3434000015258789},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.34139999747276306},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.3179999887943268},{"id":"https://openalex.org/C2780615140","wikidata":"https://www.wikidata.org/wiki/Q920419","display_name":"Upgrade","level":2,"score":0.27900001406669617},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2687999904155731},{"id":"https://openalex.org/C152060993","wikidata":"https://www.wikidata.org/wiki/Q1759392","display_name":"Vector control","level":4,"score":0.2614000141620636},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.25690001249313354},{"id":"https://openalex.org/C135402231","wikidata":"https://www.wikidata.org/wiki/Q898440","display_name":"Dissipation","level":2,"score":0.25200000405311584}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11133264","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11133264","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W2792893539","https://openalex.org/W2899077824","https://openalex.org/W2923014074","https://openalex.org/W2963323070","https://openalex.org/W2963748441","https://openalex.org/W2974585810","https://openalex.org/W2979826702","https://openalex.org/W2988640543","https://openalex.org/W2990591126","https://openalex.org/W3005619596","https://openalex.org/W3136522402","https://openalex.org/W3209892756","https://openalex.org/W3215615641","https://openalex.org/W4225916833","https://openalex.org/W4244017338","https://openalex.org/W4312847929","https://openalex.org/W4362700144","https://openalex.org/W4386108348","https://openalex.org/W4388838139","https://openalex.org/W4389166736","https://openalex.org/W4390017976","https://openalex.org/W4390241252","https://openalex.org/W4390873361","https://openalex.org/W4392897316","https://openalex.org/W4399376491","https://openalex.org/W4404102453","https://openalex.org/W4404134084"],"related_works":[],"abstract_inverted_index":{"Transformer":[0],"models":[1],"have":[2,39],"achieved":[3],"state-of-the-art":[4,217],"performance":[5],"in":[6,50,111,117,184,209],"various":[7],"natural":[8],"language":[9],"processing":[10],"(NLP)":[11],"and":[12,33,75,109,119,163,192,205,212],"computer":[13],"vision":[14],"(CV)":[15],"tasks.":[16],"To":[17,149],"meet":[18],"their":[19,79],"substantial":[20],"computational":[21],"demands,":[22],"the":[23,29,46,51,102,125,138,178,188,202],"compute-in-memory":[24],"(CiM)":[25],"architectures,":[26],"which":[27,54,186],"alleviate":[28],"memory":[30,64],"wall":[31],"problem":[32],"enable":[34],"efficient":[35,128,174],"vector-matrix":[36],"multiplication":[37],"(VMM),":[38],"been":[40],"adopted":[41],"for":[42,62,167,177],"transformer":[43,141,220],"accelerators.":[44],"However,":[45,133],"dynamic":[47,115],"VMM":[48],"involved":[49],"attention":[52],"mechanism,":[53],"necessitates":[55],"runtime":[56,103],"write":[57,69,104],"operations,":[58],"presents":[59],"significant":[60],"challenges":[61],"non-volatile":[63],"(NVM)-based":[65],"CiM":[66,91,131,190,219],"designs.":[67,221],"High":[68],"overhead,":[70],"complex":[71],"compute-write-compute":[72],"(CWC)":[73],"dependencies,":[74],"limited":[76,146],"endurance":[77],"reduce":[78],"effectiveness.":[80],"In":[81],"this":[82],"paper,":[83],"we":[84,152],"propose":[85],"VQT-CiM,":[86],"a":[87,154],"ferroelectric":[88],"FET":[89],"(FeFET)-based":[90],"design":[92],"that":[93,158,199],"accelerates":[94],"vector":[95,155],"quantization":[96,156],"(VQ)":[97],"enhanced":[98,168],"transformers":[99],"by":[100],"eliminating":[101],"operations.":[105],"VQT-CiM":[106,180,200],"quantizes":[107],"keys":[108],"values":[110],"self-attention":[112],"to":[113,144,216],"convert":[114],"VMMs":[116,123],"inner-product":[118],"weighted-sum":[120],"into":[121],"static":[122],"with":[124,130,181],"codebooks,":[126],"enabling":[127],"calculations":[129],"crossbars.":[132],"directly":[134],"applying":[135],"VQ":[136,161,165],"hinders":[137],"accuracy":[139],"of":[140],"model":[142],"due":[143],"its":[145],"representation":[147,169],"capability.":[148],"address":[150],"this,":[151],"introduce":[153],"scheme":[157],"integrates":[159],"residual":[160],"(RVQ)":[162],"product":[164],"(PVQ)":[166],"space.":[170],"We":[171],"present":[172],"an":[173],"hardware":[175],"implementation":[176],"proposed":[179],"optimized":[182],"dataflow":[183],"RVQ,":[185],"incorporates":[187],"FeFET-based":[189],"crossbars":[191],"peripheral":[193],"digital":[194],"circuits.":[195],"Evaluation":[196],"results":[197],"suggest":[198],"achieves":[201],"$3.54":[203],"\\times$":[204,207],"$4.53":[206],"improvements":[208],"energy":[210],"efficiency":[211],"throughput,":[213],"respectively,":[214],"compared":[215],"NVM-based":[218]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
