{"id":"https://openalex.org/W4416963832","doi":"https://doi.org/10.1109/islped65674.2025.11261760","title":"A 20.98TOPS/W Energy-Efficient Binary BERT Model on Group Vector Systolic CIM Accelerator","display_name":"A 20.98TOPS/W Energy-Efficient Binary BERT Model on Group Vector Systolic CIM Accelerator","publication_year":2025,"publication_date":"2025-08-06","ids":{"openalex":"https://openalex.org/W4416963832","doi":"https://doi.org/10.1109/islped65674.2025.11261760"},"language":null,"primary_location":{"id":"doi:10.1109/islped65674.2025.11261760","is_oa":false,"landing_page_url":"https://doi.org/10.1109/islped65674.2025.11261760","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/ACM International Symposium on Low Power Electronics and Design (ISLPED)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067813480","display_name":"Dingbang Liu","orcid":"https://orcid.org/0009-0009-4597-1103"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dingbang Liu","raw_affiliation_strings":["Southern University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082090342","display_name":"Ziyi Guan","orcid":"https://orcid.org/0000-0001-9866-6588"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Ziyi Guan","raw_affiliation_strings":["The University of Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Qilong Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qilong Chen","raw_affiliation_strings":["Southern University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100370048","display_name":"Jun Gu","orcid":"https://orcid.org/0009-0007-4966-7630"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jingyun Gu","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100619615","display_name":"Jiaqi Yang","orcid":"https://orcid.org/0000-0002-2071-2457"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaqi Yang","raw_affiliation_strings":["Southern University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090865912","display_name":"Kai Li","orcid":"https://orcid.org/0000-0003-3251-931X"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Li","raw_affiliation_strings":["Southern University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062269789","display_name":"Wei Mao","orcid":"https://orcid.org/0000-0003-2527-6778"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Mao","raw_affiliation_strings":["Xidian University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Xidian University,China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072120057","display_name":"Ngai Wong","orcid":"https://orcid.org/0000-0002-3026-0108"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Ngai Wong","raw_affiliation_strings":["The University of Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002277899","display_name":"Chang Wen Chen","orcid":"https://orcid.org/0000-0002-6720-234X"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Changwen Chen","raw_affiliation_strings":["The Hong Kong Polytechnic University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Hong Kong Polytechnic University","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034853402","display_name":"Hao Yu","orcid":"https://orcid.org/0000-0002-2674-4118"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Yu","raw_affiliation_strings":["Southern University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology","institution_ids":["https://openalex.org/I3045169105"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.32923787,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.33059999346733093,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.33059999346733093,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.19949999451637268,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.12389999628067017,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/static-random-access-memory","display_name":"Static random-access memory","score":0.5982999801635742},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.5306000113487244},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.4950999915599823},{"id":"https://openalex.org/keywords/systolic-array","display_name":"Systolic array","score":0.4131999909877777},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.3970000147819519},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.38440001010894775},{"id":"https://openalex.org/keywords/ternary-operation","display_name":"Ternary operation","score":0.38260000944137573},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.366100013256073},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.366100013256073}],"concepts":[{"id":"https://openalex.org/C68043766","wikidata":"https://www.wikidata.org/wiki/Q267416","display_name":"Static random-access memory","level":2,"score":0.5982999801635742},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.5306000113487244},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.49779999256134033},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.4950999915599823},{"id":"https://openalex.org/C150741067","wikidata":"https://www.wikidata.org/wiki/Q2377218","display_name":"Systolic array","level":3,"score":0.4131999909877777},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.3970000147819519},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.38440001010894775},{"id":"https://openalex.org/C64452783","wikidata":"https://www.wikidata.org/wiki/Q1524945","display_name":"Ternary operation","level":2,"score":0.38260000944137573},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.366100013256073},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.366100013256073},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.36579999327659607},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.36230000853538513},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.36010000109672546},{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.3521000146865845},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.3336000144481659},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3172999918460846},{"id":"https://openalex.org/C1232282","wikidata":"https://www.wikidata.org/wiki/Q1783551","display_name":"Data conversion","level":2,"score":0.30059999227523804},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.29440000653266907},{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.28690001368522644},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.2721000015735626},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C45737032","wikidata":"https://www.wikidata.org/wiki/Q748364","display_name":"S-box","level":4,"score":0.26660001277923584},{"id":"https://openalex.org/C118993495","wikidata":"https://www.wikidata.org/wiki/Q5042828","display_name":"Electrical efficiency","level":3,"score":0.2619999945163727},{"id":"https://openalex.org/C3017489831","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Running time","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.2596000134944916},{"id":"https://openalex.org/C2780443747","wikidata":"https://www.wikidata.org/wiki/Q3516922","display_name":"Group technology","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/islped65674.2025.11261760","is_oa":false,"landing_page_url":"https://doi.org/10.1109/islped65674.2025.11261760","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/ACM International Symposium on Low Power Electronics and Design (ISLPED)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2518281301","https://openalex.org/W2588191434","https://openalex.org/W3134526034","https://openalex.org/W3173374050","https://openalex.org/W4221086307","https://openalex.org/W4285250061","https://openalex.org/W4286571858","https://openalex.org/W4312196615","https://openalex.org/W4313332258","https://openalex.org/W4386763737","https://openalex.org/W4387789586","https://openalex.org/W4392181806","https://openalex.org/W4392775632","https://openalex.org/W4393407021","https://openalex.org/W4399146344","https://openalex.org/W4401880123","https://openalex.org/W4404134110","https://openalex.org/W4404134161"],"related_works":[],"abstract_inverted_index":{"Transformer-based":[0],"large":[1],"language":[2],"models":[3],"(LLMs)":[4],"impose":[5],"significant":[6],"bandwidth":[7],"and":[8,48,125,132,146,152,156,169],"compute":[9],"challenges":[10],"when":[11],"deployed":[12],"on":[13,159,197],"edge":[14],"devices.":[15],"SRAM-based":[16,71],"compute-in-memory":[17],"(CIM)":[18],"accelerators":[19],"offer":[20],"a":[21,38,68,77,82,105,121,127,138,163,170,193],"promising":[22],"solution":[23],"to":[24,44,63,177],"reduce":[25],"data":[26,100],"movement":[27],"but":[28],"are":[29],"still":[30],"limited":[31],"by":[32,189],"model":[33,60,96,187],"size.":[34],"This":[35],"work":[36],"develops":[37],"ternary":[39],"weight":[40],"splitting":[41],"(TWS)":[42],"binarization":[43],"obtain":[45],"Brain-Floating-Point-16\u00d7INT1":[46],"(BF16\u00d71-b)":[47],"INT8\u00d7INT1":[49],"(8-b\u00d71-b)":[50],"based":[51],"transformers":[52],"that":[53],"exhibit":[54],"competitive":[55],"accuracy":[56,195],"while":[57],"significantly":[58],"reducing":[59],"size":[61,188],"compared":[62,176],"full":[64],"precision":[65],"counterparts.":[66,180],"Then,":[67],"fully":[69],"digital":[70],"CIM":[72],"accelerator":[73],"is":[74],"designed":[75],"incorporating":[76],"bit-parallel":[78],"SRAM":[79,111],"macro":[80],"within":[81],"highly":[83],"efficient":[84],"group":[85],"vector":[86],"systolic":[87,99],"architecture,":[88],"which":[89],"can":[90],"store":[91],"one":[92],"column":[93],"of":[94,115,123,130,142,150],"BERT-Tiny":[95,160],"with":[97,112,191],"stationary":[98],"reuse.":[101],"The":[102],"design":[103],"in":[104,137,166,173],"28nm":[106],"technology":[107],"only":[108,192],"requires":[109],"2KB":[110],"an":[113],"area":[114,140,167],"2mm<sup":[116],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[117,144],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sup>.":[118],"It":[119],"achieves":[120],"throughput":[122],"6.55TOPS":[124],"consumes":[126],"total":[128],"power":[129],"312.5mW":[131],"221mW":[133],"at":[134],"400MHz,":[135],"resulting":[136],"state-of-the-art":[139,179],"efficiency":[141,149,168,175],"3.3TOPS/mm<sup":[143],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sup>":[145],"normalized":[147],"energy":[148,174],"20.98TOPS/W":[151],"34.35TOPS/W":[153],"for":[154],"BF16\u00d71-b":[155],"8-b\u00d71-b":[157],"respectively":[158],"model,":[161],"demonstrating":[162],"10.25\u00d7":[164],"improvement":[165,172],"2.23\u00d7":[171],"other":[178],"Additionally,":[181],"our":[182],"proposed":[183],"configuration":[184],"compresses":[185],"the":[186],"32%":[190],"0.5%":[194],"loss":[196],"SST-2.":[198]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-03T00:00:00"}
