{"id":"https://openalex.org/W4401069915","doi":"https://doi.org/10.1109/tvlsi.2024.3432403","title":"An Efficient Two-Stage Pipelined Compute-in-Memory Macro for Accelerating Transformer Feed-Forward Networks","display_name":"An Efficient Two-Stage Pipelined Compute-in-Memory Macro for Accelerating Transformer Feed-Forward Networks","publication_year":2024,"publication_date":"2024-07-29","ids":{"openalex":"https://openalex.org/W4401069915","doi":"https://doi.org/10.1109/tvlsi.2024.3432403"},"language":"en","primary_location":{"id":"doi:10.1109/tvlsi.2024.3432403","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2024.3432403","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078909424","display_name":"Heng Zhang","orcid":"https://orcid.org/0009-0001-7827-7849"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Heng Zhang","raw_affiliation_strings":["School of Electronic Science and Engineering, Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0009-0001-7827-7849","affiliations":[{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023155958","display_name":"Wenhe Yin","orcid":"https://orcid.org/0009-0001-0664-6035"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhe Yin","raw_affiliation_strings":["School of Electronic Science and Engineering, Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0009-0001-0664-6035","affiliations":[{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073567200","display_name":"Sunan He","orcid":"https://orcid.org/0009-0004-4604-9587"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sunan He","raw_affiliation_strings":["School of Electronic Science and Engineering, Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0009-0004-4604-9587","affiliations":[{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084816122","display_name":"Yuan Du","orcid":"https://orcid.org/0000-0002-5316-619X"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Du","raw_affiliation_strings":["School of Electronic Science and Engineering, Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-5316-619X","affiliations":[{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039665206","display_name":"Li Du","orcid":"https://orcid.org/0000-0003-2687-6978"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Du","raw_affiliation_strings":["School of Electronic Science and Engineering, Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0003-2687-6978","affiliations":[{"raw_affiliation_string":"School of Electronic Science and Engineering, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5078909424"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":0.6009,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.67052955,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"32","issue":"10","first_page":"1889","last_page":"1899"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13183","display_name":"Islanding Detection in Power Systems","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13183","display_name":"Islanding Detection in Power Systems","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10232","display_name":"Optical Network Technologies","score":0.9793999791145325,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9764000177383423,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/macro","display_name":"Macro","score":0.6947160363197327},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6219946146011353},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5692667365074158},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5571134090423584},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.3288225829601288},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.2451489269733429},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.21897634863853455},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1472732424736023},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12846684455871582},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07594040036201477}],"concepts":[{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.6947160363197327},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6219946146011353},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5692667365074158},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5571134090423584},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.3288225829601288},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.2451489269733429},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.21897634863853455},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1472732424736023},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12846684455871582},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07594040036201477}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tvlsi.2024.3432403","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2024.3432403","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.8100000023841858,"id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G5559412533","display_name":null,"funder_award_id":"62371223","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W2809624076","https://openalex.org/W2896457183","https://openalex.org/W2923014074","https://openalex.org/W2963082137","https://openalex.org/W2975429091","https://openalex.org/W3017024317","https://openalex.org/W3088773763","https://openalex.org/W3114885184","https://openalex.org/W3135906938","https://openalex.org/W3138828421","https://openalex.org/W3139521791","https://openalex.org/W3161873870","https://openalex.org/W3189877953","https://openalex.org/W3202028501","https://openalex.org/W3208788005","https://openalex.org/W4287118909","https://openalex.org/W4288089799","https://openalex.org/W4301581299","https://openalex.org/W4309591680","https://openalex.org/W4312847929","https://openalex.org/W4313069943","https://openalex.org/W4313639541","https://openalex.org/W4319866755","https://openalex.org/W4322718253","https://openalex.org/W4362598949","https://openalex.org/W4366341968","https://openalex.org/W4367146866","https://openalex.org/W4381713178","https://openalex.org/W4381827750","https://openalex.org/W4385245566","https://openalex.org/W4385732107","https://openalex.org/W4386736489","https://openalex.org/W4387042316","https://openalex.org/W4389799305","https://openalex.org/W4390017976","https://openalex.org/W4390871466","https://openalex.org/W6753069482","https://openalex.org/W6755207826","https://openalex.org/W6758046424","https://openalex.org/W6767719158","https://openalex.org/W6769627184","https://openalex.org/W6773820404","https://openalex.org/W6787517681","https://openalex.org/W6796707148","https://openalex.org/W6796815506","https://openalex.org/W6847478871","https://openalex.org/W6848185429","https://openalex.org/W6850162387","https://openalex.org/W6851828392","https://openalex.org/W6853093965","https://openalex.org/W6853667026"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2030816003","https://openalex.org/W4239992647","https://openalex.org/W2150013480","https://openalex.org/W1554458299","https://openalex.org/W2076325756","https://openalex.org/W81423522","https://openalex.org/W1509860481","https://openalex.org/W2488264085"],"abstract_inverted_index":{"Transformer":[0],"architectures":[1],"have":[2],"achieved":[3],"state-of-the-art":[4],"performance":[5],"in":[6,67],"various":[7],"applications.":[8],"However,":[9],"deploying":[10,43],"transformer":[11,52,169],"models":[12,170],"on":[13],"resource-constrained":[14],"platforms":[15],"is":[16,146],"still":[17],"challenging":[18],"due":[19],"to":[20,60,81,100,107],"its":[21],"dynamic":[22],"workloads,":[23],"intensive":[24],"computations,":[25],"and":[26,44,105,126,139,152,162,188],"substantial":[27],"memory":[28],"access.":[29],"In":[30],"this":[31],"article,":[32],"we":[33,175],"propose":[34],"a":[35],"two-stage":[36],"pipelined":[37],"compute-in-memory":[38],"(CIM)":[39],"macro":[40,137,145],"for":[41,112,122,130,186,191],"effectively":[42],"accelerating":[45],"the":[46,62,83,93,101,108,120,172,177],"feed-forward":[47],"network":[48],"(FFN)":[49],"layers":[50,179],"of":[51,180],"models.":[53],"Two":[54],"independent":[55],"CIM":[56,95,110],"arrays":[57],"are":[58,71,97],"designed":[59],"execute":[61],"two":[63],"distinct":[64],"linear":[65,77,115],"projections":[66],"FFN":[68,178],"layers,":[69],"which":[70,197],"interconnected":[72],"by":[73],"co-designed":[74],"analog":[75,88,102],"rectified":[76],"unit":[78],"(ReLU)":[79],"circuits":[80],"realize":[82],"nonlinear":[84],"activation":[85],"function.":[86],"The":[87],"multiply-and-add":[89],"(MAC)":[90],"results":[91],"from":[92],"first":[94],"array":[96,111],"streamed":[98],"directly":[99],"ReLU":[103],"circuits,":[104],"subsequently":[106],"next":[109],"performing":[113],"another":[114],"projection.":[116],"This":[117],"architecture":[118],"eliminates":[119],"need":[121],"analog-to-digital":[123],"converters":[124,128],"(ADCs)":[125],"digital-to-analog":[127],"(DACs)":[129],"internal":[131],"results\u2019":[132],"staging,":[133],"thereby":[134],"enhancing":[135],"overall":[136],"efficiency":[138],"reducing":[140],"computing":[141],"latency.":[142],"A":[143],"proof-of-concept":[144],"fabricated":[147],"using":[148,193],"TSMC":[149],"65-nm":[150],"process":[151],"achieves":[153],"4.096":[154],"TOPS":[155],"peak":[156],"throughput,":[157],"4.39":[158],"TOPS/mm2":[159],"area":[160],"efficiency,":[161],"49.83":[163],"TOPS/W":[164],"energy":[165],"efficiency.":[166],"To":[167],"map":[168],"onto":[171],"proposed":[173],"macro,":[174],"quantize":[176],"BERTMINI":[181],"model":[182],"under":[183],"per-token":[184],"granularity":[185,190],"activations":[187],"per-tensor":[189],"weights":[192],"quantization-aware":[194],"training":[195],"(QAT),":[196],"exhibits":[198],"excellent":[199],"accuracy":[200],"across":[201],"multiple":[202],"benchmarks.":[203]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
