{"id":"https://openalex.org/W4408183246","doi":"https://doi.org/10.1109/isscc49661.2025.10904774","title":"23.8 An 88.36TOPS/W Bit-Level-Weight-Compressed Large-Language-Model Accelerator with Cluster-Aligned INT-FP-GEMM and Bi-Dimensional Workflow Reformulation","display_name":"23.8 An 88.36TOPS/W Bit-Level-Weight-Compressed Large-Language-Model Accelerator with Cluster-Aligned INT-FP-GEMM and Bi-Dimensional Workflow Reformulation","publication_year":2025,"publication_date":"2025-02-16","ids":{"openalex":"https://openalex.org/W4408183246","doi":"https://doi.org/10.1109/isscc49661.2025.10904774"},"language":"en","primary_location":{"id":"doi:10.1109/isscc49661.2025.10904774","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isscc49661.2025.10904774","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Solid-State Circuits Conference (ISSCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071432313","display_name":"Yubin Qin","orcid":"https://orcid.org/0000-0001-5530-5416"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yubin Qin","raw_affiliation_strings":["Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100322887","display_name":"Yang Wang","orcid":"https://orcid.org/0000-0003-1029-9280"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Wang","raw_affiliation_strings":["Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028744089","display_name":"Jiachen Wang","orcid":"https://orcid.org/0009-0001-1857-4024"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiachen Wang","raw_affiliation_strings":["Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100312517","display_name":"Lin Zhiwei","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiwei Lin","raw_affiliation_strings":["Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101913213","display_name":"Yu Zhao","orcid":"https://orcid.org/0000-0003-1479-385X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yushu Zhao","raw_affiliation_strings":["Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080635076","display_name":"Shaojun Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaojun Wei","raw_affiliation_strings":["Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089921601","display_name":"Yang Hu","orcid":"https://orcid.org/0000-0003-0379-1525"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Hu","raw_affiliation_strings":["Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054524841","display_name":"Shouyi Yin","orcid":"https://orcid.org/0000-0003-2309-572X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shouyi Yin","raw_affiliation_strings":["Tsinghua University,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5071432313"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":26.8864,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.99672408,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"420","last_page":"422"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9391000270843506,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9391000270843506,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9280999898910522,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6598646640777588},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6013606190681458},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5943804383277893},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.5665359497070312},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.4799545109272003},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.4633041024208069},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.41788265109062195},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3555128574371338},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.17043951153755188}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6598646640777588},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6013606190681458},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5943804383277893},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.5665359497070312},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.4799545109272003},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4633041024208069},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.41788265109062195},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3555128574371338},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.17043951153755188}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isscc49661.2025.10904774","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isscc49661.2025.10904774","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Solid-State Circuits Conference (ISSCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4967090155","display_name":null,"funder_award_id":"2022ZD0115201","funder_id":"https://openalex.org/F4320329860","funder_display_name":"National Science and Technology Major Project"},{"id":"https://openalex.org/G8978128402","display_name":null,"funder_award_id":"62125403,92164301,62304121","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329777","display_name":"Beijing National Research Center For Information Science And Technology","ror":null},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2963341956","https://openalex.org/W4220865834","https://openalex.org/W4286571858","https://openalex.org/W4312847929","https://openalex.org/W4360605545","https://openalex.org/W4360606782","https://openalex.org/W4380881077","https://openalex.org/W4385216221","https://openalex.org/W4392679398","https://openalex.org/W4392739437","https://openalex.org/W4396909834","https://openalex.org/W4401211806","https://openalex.org/W4401881162","https://openalex.org/W4406650295","https://openalex.org/W6766673545","https://openalex.org/W6772383348","https://openalex.org/W6811340617","https://openalex.org/W6846164622","https://openalex.org/W6852927819","https://openalex.org/W6853187832","https://openalex.org/W6854866820"],"related_works":["https://openalex.org/W1981780420","https://openalex.org/W2182707996","https://openalex.org/W45233828","https://openalex.org/W2964988449","https://openalex.org/W2397952901","https://openalex.org/W2029380707","https://openalex.org/W188202134","https://openalex.org/W4255934811","https://openalex.org/W2465382974","https://openalex.org/W2010229520"],"abstract_inverted_index":{"Large":[0],"language":[1,14],"models":[2],"(LLMs)":[3],"have":[4],"shown":[5,56],"remarkable":[6],"performance":[7,28],"across":[8],"a":[9,108,127,137,209,219],"wide":[10],"range":[11],"of":[12,22,50,249],"natural":[13],"processing":[15,220],"(NLP)":[16],"tasks,":[17],"becoming":[18],"an":[19,61,119,133],"essential":[20],"part":[21],"modern":[23],"society":[24],"[1]\u2013[4].":[25],"This":[26,176],"exceptional":[27],"can":[29,151],"be":[30,152,160],"attributed":[31],"to":[32,70,126,147,154,170,174,237,282],"huge":[33],"model":[34,96],"size":[35],"and":[36,98,184,202,228,263,266],"autoregressive":[37,72],"computation":[38],"[5],":[39],"[6].":[40],"However,":[41],"these":[42],"attributes":[43],"pose":[44],"challenges":[45],"for":[46,103,115,231,247,260],"the":[47,85,92,99,113,157,225,232,250],"efficient":[48,257],"deployment":[49,259],"LLMs":[51,193],"from":[52,191],"3":[53],"aspects,":[54],"as":[55,199,216],"in":[57,162,243],"Fig.":[58],"23.8.1.":[59],"First,":[60],"LLM":[62,120,134,258],"has":[63],"enormous":[64],"external":[65],"memory":[66],"access":[67],"(EMA)":[68],"due":[69],"its":[71],"feature.":[73],"During":[74],"inference,":[75],"it":[76],"generates":[77],"output":[78],"tokens":[79],"one":[80,82],"by":[81,208,218,270],"until":[83],"all":[84,95],"outputs":[86],"are":[87,205,267,280],"complete.":[88],"In":[89],"each":[90],"iteration,":[91],"processor":[93],"reads":[94],"weights":[97,150,169],"key-value":[100],"(KV)":[101],"cache":[102,114],"computation,":[104],"while":[105],"also":[106],"storing":[107],"newly":[109],"generated":[110],"KV":[111],"into":[112],"further":[116],"use.":[117],"Hence,":[118],"requires":[121],"over":[122],"1000x":[123],"EMA":[124],"compared":[125],"non-autoregressive":[128],"Transformer":[129],"[7],":[130],"[8].":[131],"Second,":[132],"may":[135],"use":[136,195],"unique":[138],"integer-float":[139],"(INT-FP)":[140],"mixed-precision":[141],"general":[142],"matrix":[143],"multiplication":[144],"(MP-GEMM).":[145],"Due":[146],"accuracy":[148],"concerns,":[149],"quantized":[153],"INT,":[155],"but":[156],"activations":[158],"should":[159],"retained":[161],"FP":[163],"[9]\u2013[11].":[164],"Generally,":[165],"processors":[166,235],"dequantize":[167],"INT":[168],"FP,":[171],"turning":[172],"MP-GEMM":[173],"FP-GEMM.":[175],"approach":[177],"does":[178],"not":[179],"fully":[180],"exploit":[181],"quantization":[182],"benefits":[183],"consumes":[185],"4.6x":[186],"more":[187],"power.":[188],"Third,":[189],"apart":[190],"GEMM,":[192,215],"frequently":[194],"non-linear":[196,226],"functions,":[197],"such":[198],"trigonometric":[200],"functions":[201],"softmax,":[203],"which":[204,273],"commonly":[206],"computed":[207],"special":[210],"function":[211],"unit":[212],"(SFU).":[213],"The":[214],"performed":[217],"element":[221],"(PE),":[222],"depends":[223],"on":[224,275],"results":[227],"thus":[229],"waits":[230],"SFU.":[233],"Current":[234],"fail":[236],"provide":[238],"sufficient":[239],"SFU":[240],"throughput,":[241],"resulting":[242],"PEs":[244],"left":[245],"waiting":[246],"1/3":[248],"time":[251],"[12],":[252],"[13].":[253],"These":[254],"problems":[255],"hinder":[256],"both":[261],"cloud":[262],"edge":[264],"devices,":[265],"largely":[268],"unexplored":[269],"existing":[271],"accelerators":[272],"focus":[274],"traditional":[276],"Transformers":[277],"[13]\u2013[22]":[278],"or":[279],"limited":[281],"specific":[283],"algorithms":[284],"[23],":[285],"[24].":[286]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
