{"id":"https://openalex.org/W4410204349","doi":"https://doi.org/10.1109/tcsi.2025.3561245","title":"SHMT: An SRAM and HBM Hybrid Computing-in-Memory Architecture With Optimized KV Cache for Multimodal Transformer","display_name":"SHMT: An SRAM and HBM Hybrid Computing-in-Memory Architecture With Optimized KV Cache for Multimodal Transformer","publication_year":2025,"publication_date":"2025-05-08","ids":{"openalex":"https://openalex.org/W4410204349","doi":"https://doi.org/10.1109/tcsi.2025.3561245"},"language":"en","primary_location":{"id":"doi:10.1109/tcsi.2025.3561245","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsi.2025.3561245","pdf_url":null,"source":{"id":"https://openalex.org/S116977442","display_name":"IEEE Transactions on Circuits and Systems I Regular Papers","issn_l":"1549-8328","issn":["1549-8328","1558-0806"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems I: Regular Papers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005225432","display_name":"Xiangqu Fu","orcid":"https://orcid.org/0000-0001-7288-9302"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangqu Fu","raw_affiliation_strings":["Key Laboratory of Fabrication Technologies for Integrated Circuits, Institute of Microelectronics of the Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7288-9302","affiliations":[{"raw_affiliation_string":"Key Laboratory of Fabrication Technologies for Integrated Circuits, Institute of Microelectronics of the Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210119392","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053627404","display_name":"Jinshan Yue","orcid":"https://orcid.org/0000-0001-8234-7400"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinshan Yue","raw_affiliation_strings":["Key Laboratory of Fabrication Technologies for Integrated Circuits, Institute of Microelectronics of the Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-8234-7400","affiliations":[{"raw_affiliation_string":"Key Laboratory of Fabrication Technologies for Integrated Circuits, Institute of Microelectronics of the Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210119392","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101496595","display_name":"Muhammad Faizan","orcid":"https://orcid.org/0000-0001-8672-4269"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Muhammad Faizan","raw_affiliation_strings":["Key Laboratory of Optoelectronic Devices and System of Ministry of Education and Guangdong Province, College of Physics and Optoelectronic Engineering, Shenzhen University, Shenzhen, China","College of Physics and Optoelectronic Engineering, Key Laboratory of Optoelectronic Devices and System of Ministry of Education and Guangdong Province, Shenzhen University, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-8672-4269","affiliations":[{"raw_affiliation_string":"Key Laboratory of Optoelectronic Devices and System of Ministry of Education and Guangdong Province, College of Physics and Optoelectronic Engineering, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"College of Physics and Optoelectronic Engineering, Key Laboratory of Optoelectronic Devices and System of Ministry of Education and Guangdong Province, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100373430","display_name":"Zhi Li","orcid":"https://orcid.org/0000-0002-6088-7972"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi Li","raw_affiliation_strings":["Key Laboratory of Fabrication Technologies for Integrated Circuits, Institute of Microelectronics of the Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Laboratory of Fabrication Technologies for Integrated Circuits, Institute of Microelectronics of the Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210119392","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102724127","display_name":"Qiang Huo","orcid":"https://orcid.org/0000-0002-9529-9919"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiang Huo","raw_affiliation_strings":["Beijing Superstring Academy of Memory Technology, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Superstring Academy of Memory Technology, Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100401317","display_name":"Feng Zhang","orcid":"https://orcid.org/0000-0003-2316-0392"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210119392","display_name":"Institute of Microelectronics","ror":"https://ror.org/02s6gs133","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210119392"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Zhang","raw_affiliation_strings":["Key Laboratory of Fabrication Technologies for Integrated Circuits, Institute of Microelectronics of the Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-2316-0392","affiliations":[{"raw_affiliation_string":"Key Laboratory of Fabrication Technologies for Integrated Circuits, Institute of Microelectronics of the Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210119392","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0719878,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"72","issue":"6","first_page":"2712","last_page":"2725"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11032","display_name":"VLSI and Analog Circuit Testing","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11032","display_name":"VLSI and Analog Circuit Testing","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6659963130950928},{"id":"https://openalex.org/keywords/static-random-access-memory","display_name":"Static random-access memory","score":0.6348679065704346},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5616689324378967},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5582477450370789},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.43166399002075195},{"id":"https://openalex.org/keywords/electronic-engineering","display_name":"Electronic engineering","score":0.42586055397987366},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4006032645702362},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3862361013889313},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.3325961232185364},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.32431337237358093},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.22865501046180725},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.21428263187408447},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.1350175142288208}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6659963130950928},{"id":"https://openalex.org/C68043766","wikidata":"https://www.wikidata.org/wiki/Q267416","display_name":"Static random-access memory","level":2,"score":0.6348679065704346},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5616689324378967},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5582477450370789},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.43166399002075195},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.42586055397987366},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4006032645702362},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3862361013889313},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.3325961232185364},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.32431337237358093},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.22865501046180725},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.21428263187408447},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.1350175142288208},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsi.2025.3561245","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsi.2025.3561245","pdf_url":null,"source":{"id":"https://openalex.org/S116977442","display_name":"IEEE Transactions on Circuits and Systems I Regular Papers","issn_l":"1549-8328","issn":["1549-8328","1558-0806"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems I: Regular Papers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.7599999904632568}],"awards":[{"id":"https://openalex.org/G180543655","display_name":null,"funder_award_id":"2023YFB4402400","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G2076236949","display_name":null,"funder_award_id":"92464201","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G213325492","display_name":null,"funder_award_id":"62204256","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3722458669","display_name":null,"funder_award_id":"U2341218","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4906109994","display_name":null,"funder_award_id":"62488101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6748911986","display_name":null,"funder_award_id":"92364202","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6824512897","display_name":null,"funder_award_id":"92464203","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8661498563","display_name":null,"funder_award_id":"62322412","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W2346540227","https://openalex.org/W2896457183","https://openalex.org/W3134304371","https://openalex.org/W4214686755","https://openalex.org/W4221001402","https://openalex.org/W4221086307","https://openalex.org/W4312847929","https://openalex.org/W4360606423","https://openalex.org/W4360831786","https://openalex.org/W4360831795","https://openalex.org/W4360832001","https://openalex.org/W4380881077","https://openalex.org/W4385245566","https://openalex.org/W4386108348","https://openalex.org/W4386763770","https://openalex.org/W4386763995","https://openalex.org/W4387042316","https://openalex.org/W6769627184","https://openalex.org/W6778883912","https://openalex.org/W6811013733","https://openalex.org/W6846659131","https://openalex.org/W6849177959","https://openalex.org/W6851592950"],"related_works":["https://openalex.org/W4392590355","https://openalex.org/W3151633427","https://openalex.org/W2212894501","https://openalex.org/W1979375376","https://openalex.org/W2793465010","https://openalex.org/W3024050170","https://openalex.org/W1976168335","https://openalex.org/W2109451123","https://openalex.org/W4378977321","https://openalex.org/W3211992815"],"abstract_inverted_index":{"Multimodal":[0],"Transformer":[1],"(MMT)":[2],"algorithms":[3],"have":[4,83],"become":[5],"the":[6,41,47,53,63,71,78,89,96,102],"state-of-the-art":[7],"for":[8,32,95,174,181],"multimodal":[9,33,134],"tasks":[10],"such":[11],"as":[12],"image":[13],"captioning.":[14],"The":[15],"Encoder-Decoder":[16],"(E-D)":[17],"structure,":[18],"consisting":[19],"of":[20,46,65],"Encoder,":[21,48,90],"Decoder-causal,":[22],"and":[23,29,43,67,74,80,100,113,123,153,163,178,185,222],"Decoder-cross":[24,81],"components,":[25],"provides":[26],"a":[27,92,141,154,167],"flexible":[28],"effective":[30],"framework":[31],"tasks.":[34],"However,":[35],"previous":[36],"accelerators":[37],"mainly":[38],"focus":[39],"on":[40],"dataflow":[42],"hardware":[44],"optimization":[45,69,148],"which":[49],"fails":[50],"to":[51,88,108,131,159,229],"accelerate":[52],"entire":[54],"E-D":[55,75,150],"structure":[56],"efficiently.":[57],"There":[58],"remain":[59],"three":[60,137],"challenges:":[61],"1)":[62,140],"lack":[64],"pipeline":[66,147],"multicore":[68,144,155],"at":[70],"module,":[72],"layer,":[73],"level;":[76],"2)":[77,166],"Decoder-causal":[79,106],"computations":[82],"lower":[84],"arithmetic":[85,98],"intensity":[86],"compared":[87,228],"requiring":[91],"better":[93],"solution":[94],"varying":[97],"intensities;":[99],"3)":[101,186],"autoregressive":[103],"algorithm":[104],"in":[105,193,200],"leads":[107],"redundant":[109,197],"KV":[110,189,201],"Cache":[111,190],"accesses":[112],"considerable":[114],"idle":[115,204],"power.":[116],"In":[117],"this":[118],"paper,":[119],"SHMT,":[120],"an":[121],"SRAM":[122],"HBM":[124],"hybrid":[125],"computing-in-memory":[126],"(CIM)":[127],"architecture,":[128,158,170],"is":[129],"designed":[130],"efficiently":[132],"support":[133],"Transformers":[135],"with":[136,191],"key":[138],"contributions:":[139],"multi-level":[142],"pipelined":[143],"scheme,":[145],"including":[146],"across":[149],"layer-head-module":[151],"levels":[152],"network-on-chip":[156],"(NoC)":[157],"reduce":[160],"inference":[161],"latency":[162],"off-chip":[164],"accesses;":[165],"heterogeneous":[168],"SRAM-HBM":[169],"utilizing":[171],"high-density":[172],"HBM-CIM":[173],"low-arithmetic-intensity":[175],"(LAI)":[176],"parts":[177],"high-performance":[179],"SRAM-CIM":[180],"high-arithmetic-intensity":[182],"(HAI)":[183],"parts;":[184],"by":[187,218],"integrating":[188],"zero-padding":[192],"SRAM-CIM,":[194],"SHMT":[195,211],"eliminates":[196],"read-write":[198],"operations":[199],"Cache,":[202],"reducing":[203],"power":[205],"consumption.":[206],"Experiment":[207],"results":[208],"show":[209],"that":[210],"achieves":[212,223],"212\u00d7":[213],"speedup,":[214],"reduces":[215],"energy":[216,226],"consumption":[217],"208\u00d7~2000\u00d7":[219],"per":[220],"token,":[221],"13.3\u00d7":[224],"higher":[225],"efficiency":[227],"NVIDIA":[230],"A100":[231],"GPU.":[232]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
