{"id":"https://openalex.org/W4409248731","doi":"https://doi.org/10.1109/hpca61900.2025.00058","title":"Exploring the Performance Improvement of Tensor Processing Engines through Transformation in the Bit-weight Dimension of MACs","display_name":"Exploring the Performance Improvement of Tensor Processing Engines through Transformation in the Bit-weight Dimension of MACs","publication_year":2025,"publication_date":"2025-03-01","ids":{"openalex":"https://openalex.org/W4409248731","doi":"https://doi.org/10.1109/hpca61900.2025.00058"},"language":"en","primary_location":{"id":"doi:10.1109/hpca61900.2025.00058","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00058","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000968348","display_name":"Qizhe Wu","orcid":"https://orcid.org/0000-0002-4977-5363"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qizhe Wu","raw_affiliation_strings":["University of Science and Technology of China,Department of Physics"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Physics","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051492842","display_name":"Huawen Liang","orcid":"https://orcid.org/0000-0003-3196-2942"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huawen Liang","raw_affiliation_strings":["University of Science and Technology of China,Department of Physics"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Physics","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079212176","display_name":"Yuchen Gui","orcid":"https://orcid.org/0009-0004-3331-6949"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuchen Gui","raw_affiliation_strings":["University of Science and Technology of China,Department of Physics"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Physics","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104284191","display_name":"Zhichen Zeng","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhichen Zeng","raw_affiliation_strings":["University of Science and Technology of China,Department of Physics"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Physics","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103234348","display_name":"Zerong He","orcid":"https://orcid.org/0000-0003-2063-170X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zerong He","raw_affiliation_strings":["University of Science and Technology of China,Department of Physics"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Physics","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060373137","display_name":"Linfeng Tao","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linfeng Tao","raw_affiliation_strings":["University of Science and Technology of China,Department of Physics"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Physics","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084537692","display_name":"Xiaotian Wang","orcid":"https://orcid.org/0000-0002-1354-730X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaotian Wang","raw_affiliation_strings":["University of Science and Technology of China,Department of Physics"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Physics","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048395052","display_name":"Letian Zhao","orcid":"https://orcid.org/0009-0001-5295-1831"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Letian Zhao","raw_affiliation_strings":["University of Science and Technology of China,Department of Physics"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Physics","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhaoxi Zeng","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaoxi Zeng","raw_affiliation_strings":["University of Science and Technology of China,Department of Physics"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Physics","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059519578","display_name":"Wei Yuan","orcid":"https://orcid.org/0000-0001-9357-5716"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Yuan","raw_affiliation_strings":["University of Science and Technology of China,Department of Physics"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Physics","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069427023","display_name":"Wei Wu","orcid":"https://orcid.org/0000-0001-7971-2761"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Wu","raw_affiliation_strings":["University of Science and Technology of China,Department of Physics"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Physics","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046555671","display_name":"Xi Jin","orcid":"https://orcid.org/0000-0003-2087-0698"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xi Jin","raw_affiliation_strings":["University of Science and Technology of China,Department of Physics"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Department of Physics","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5000968348"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.7544,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.70209674,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"685","last_page":"700"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.798799991607666,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.798799991607666,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.7853999733924866,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13674","display_name":"Computer Science and Engineering","score":0.7601000070571899,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.7095974087715149},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.693362832069397},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6193950772285461},{"id":"https://openalex.org/keywords/bit","display_name":"Bit (key)","score":0.4722527861595154},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.4671083688735962},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.44975653290748596},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2201775312423706},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.11527761816978455},{"id":"https://openalex.org/keywords/pure-mathematics","display_name":"Pure mathematics","score":0.09461307525634766}],"concepts":[{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.7095974087715149},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.693362832069397},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6193950772285461},{"id":"https://openalex.org/C117011727","wikidata":"https://www.wikidata.org/wiki/Q1278488","display_name":"Bit (key)","level":2,"score":0.4722527861595154},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.4671083688735962},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.44975653290748596},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2201775312423706},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.11527761816978455},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.09461307525634766},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca61900.2025.00058","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00058","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8100000023841858,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1976039195","https://openalex.org/W1983849809","https://openalex.org/W2055119962","https://openalex.org/W2096980603","https://openalex.org/W2097389088","https://openalex.org/W2107088801","https://openalex.org/W2114366942","https://openalex.org/W2136752682","https://openalex.org/W2146301839","https://openalex.org/W2163578701","https://openalex.org/W2236988532","https://openalex.org/W2606722458","https://openalex.org/W2612076670","https://openalex.org/W2791673912","https://openalex.org/W2931118404","https://openalex.org/W2940862705","https://openalex.org/W2943267175","https://openalex.org/W3034176714","https://openalex.org/W3127736057","https://openalex.org/W3132942233","https://openalex.org/W3157657667","https://openalex.org/W3159782774","https://openalex.org/W3207265322","https://openalex.org/W4220662607","https://openalex.org/W4247198796","https://openalex.org/W4288083528","https://openalex.org/W4308083928","https://openalex.org/W4360605374","https://openalex.org/W4360831844","https://openalex.org/W4380874786","https://openalex.org/W4386568792","https://openalex.org/W4391696987","https://openalex.org/W4393406875","https://openalex.org/W4393407042","https://openalex.org/W4400726540","https://openalex.org/W4401212175","https://openalex.org/W4406014880","https://openalex.org/W6729126992","https://openalex.org/W6763456558","https://openalex.org/W6771656455","https://openalex.org/W6859466964","https://openalex.org/W6859486663","https://openalex.org/W6859532484","https://openalex.org/W6861059166"],"related_works":["https://openalex.org/W4327546585","https://openalex.org/W2411923897","https://openalex.org/W4394546135","https://openalex.org/W4285347720","https://openalex.org/W4200259850","https://openalex.org/W2333831899","https://openalex.org/W2484894494","https://openalex.org/W2367385042","https://openalex.org/W4381186982","https://openalex.org/W2040781570"],"abstract_inverted_index":{"General":[0],"matrix-matrix":[1],"multiplication":[2,49,72],"(GEMM),":[3],"serving":[4],"as":[5,17,97],"a":[6,67,74,89,197,202],"cornerstone":[7],"of":[8,47,62,82,127,170],"AI":[9],"computations,":[10],"has":[11],"positioned":[12],"tensor":[13],"processing":[14],"engines":[15],"(TPEs)":[16],"increasingly":[18],"critical":[19],"components":[20],"within":[21],"existing":[22],"GPUs":[23],"and":[24,103,107,116,132,162,177,180,187,209,227],"domain-specific":[25],"architectures":[26,34,152],"(DSA).":[27],"Our":[28,219],"analysis":[29],"identifies":[30],"that":[31,123],"the":[32,45,60,79,113,142],"prevailing":[33],"primarily":[35],"focus":[36],"on":[37,70,78,112],"dataflow":[38],"or":[39],"operand":[40],"reuse":[41],"strategies,":[42],"when":[43],"considering":[44],"combination":[46],"matrix":[48,71,94],"with":[50,224],"multiply-accumulator":[51],"(MAC)":[52],"itself,":[53],"it":[54],"provides":[55],"greater":[56],"optimization":[57,121],"space":[58],"for":[59,105,190,230],"design":[61,138],"TPEs.":[63],"This":[64],"work":[65],"introduces":[66],"novel":[68],"perspective":[69],"from":[73],"hardware":[75],"standpoint,":[76],"focusing":[77],"bit-weight":[80],"dimension":[81],"MACs.":[83],"Through":[84],"this":[85],"lens,":[86],"we":[87,118,165,200],"propose":[88,119],"finer-grained":[90],"TPE":[91,151],"notation,":[92],"using":[93,141],"triple":[95],"loops":[96],"an":[98],"example,":[99],"introducing":[100],"new":[101,114],"methods":[102,147],"ideas":[104],"designing":[106],"optimizing":[108],"PE":[109],"microarchitecture.":[110],"Based":[111],"notation":[115],"transformations,":[117],"four":[120,149],"techniques":[122],"achieve":[124],"varying":[125],"degrees":[126],"improvement":[128,205],"in":[129,139,206,212,233],"timing,":[130,225],"area,":[131,226],"power":[133,228],"consumption.":[134],"We":[135],"implement":[136],"our":[137,146],"RTL":[140],"SMIC-28nm":[143],"process.":[144],"Applying":[145],"to":[148,196,216],"classic":[150],"(include":[153],"systolic":[154],"array":[155],"[20],":[156],"3D-Cube":[157],"[27],":[158],"multiplier-adder":[159],"tree":[160],"[48],":[161],"2D-Matrix":[163],"[30]),":[164],"achieved":[166,201],"area":[167,213],"efficiency":[168,192,208,214],"improvements":[169],"$1.27":[171],"\\times,":[172,174,182,184],"1.28":[173],"1.56":[175,183],"\\times$,":[176,179,186],"$1.44":[178],"$1.04":[181],"1.49":[185],"$1.20":[188],"\\times$":[189,204,211],"energy":[191,207],"respectively.":[193],"When":[194],"applied":[195],"bit-slice":[198],"architecture,":[199],"$12.10":[203],"$2.85":[210],"compared":[215],"Laconic":[217],"[38].":[218],"Verilog":[220],"HDL":[221],"code,":[222],"along":[223],"reports":[229],"circuit":[231],"synthesis":[232],"URL:":[234],"https://github.com/wqzustc/High-Performance-Tensor-Processing-Engines.":[235]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
