{"id":"https://openalex.org/W4414198373","doi":"https://doi.org/10.1109/dac63849.2025.11133069","title":"DenSparSA: A Balanced Systolic Array Approach for Dense and Sparse Matrix Multiplication","display_name":"DenSparSA: A Balanced Systolic Array Approach for Dense and Sparse Matrix Multiplication","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414198373","doi":"https://doi.org/10.1109/dac63849.2025.11133069"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11133069","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11133069","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100717657","display_name":"Ziheng Wang","orcid":"https://orcid.org/0000-0001-5064-2376"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ziheng Wang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101995440","display_name":"Ruiqi Sun","orcid":"https://orcid.org/0000-0002-5433-2380"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruiqi Sun","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101635425","display_name":"Xin He","orcid":"https://orcid.org/0000-0001-9280-1208"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xin He","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075420600","display_name":"Tianrui Ma","orcid":"https://orcid.org/0000-0003-0894-6653"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianrui Ma","raw_affiliation_strings":["Institute of Computing Technology, CAS"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090386055","display_name":"An Zou","orcid":"https://orcid.org/0000-0002-0083-5281"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"An Zou","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100717657"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":2.5505,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.89578243,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12162","display_name":"Cellular Automata and Applications","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.7429999709129333},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.7208999991416931},{"id":"https://openalex.org/keywords/systolic-array","display_name":"Systolic array","score":0.703000009059906},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6378999948501587},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.5965999960899353},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.5846999883651733},{"id":"https://openalex.org/keywords/sparse-array","display_name":"Sparse array","score":0.5160999894142151},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.5012999773025513}],"concepts":[{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.7429999709129333},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.7208999991416931},{"id":"https://openalex.org/C150741067","wikidata":"https://www.wikidata.org/wiki/Q2377218","display_name":"Systolic array","level":3,"score":0.703000009059906},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6582000255584717},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6378999948501587},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.5965999960899353},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.5846999883651733},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5328999757766724},{"id":"https://openalex.org/C145177509","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse array","level":2,"score":0.5160999894142151},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.5012999773025513},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4749999940395355},{"id":"https://openalex.org/C124066611","wikidata":"https://www.wikidata.org/wiki/Q28684319","display_name":"Sparse approximation","level":2,"score":0.4472000002861023},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.42809998989105225},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.396699994802475},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.39169999957084656},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3553999960422516},{"id":"https://openalex.org/C33343441","wikidata":"https://www.wikidata.org/wiki/Q806350","display_name":"Band matrix","level":5,"score":0.33719998598098755},{"id":"https://openalex.org/C118993495","wikidata":"https://www.wikidata.org/wiki/Q5042828","display_name":"Electrical efficiency","level":3,"score":0.30300000309944153},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2800999879837036},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.26980000734329224},{"id":"https://openalex.org/C2776639384","wikidata":"https://www.wikidata.org/wiki/Q840396","display_name":"Ideal (ethics)","level":2,"score":0.26570001244544983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11133069","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11133069","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1561605110","https://openalex.org/W1735172278","https://openalex.org/W2119925642","https://openalex.org/W2606722458","https://openalex.org/W2625954420","https://openalex.org/W3016542674","https://openalex.org/W3036878841","https://openalex.org/W3040024858","https://openalex.org/W3047872496","https://openalex.org/W3090912412","https://openalex.org/W3130920634","https://openalex.org/W3134495297","https://openalex.org/W3167210698","https://openalex.org/W4316252394","https://openalex.org/W4320067931","https://openalex.org/W4320712936","https://openalex.org/W4401212175"],"related_works":[],"abstract_inverted_index":{"Numerous":[0],"studies":[1],"have":[2],"proposed":[3,142],"hardware":[4,112],"architectures":[5],"to":[6,44,62,88,134,162,166,191],"accelerate":[7],"sparse":[8,45,57,82,100,172,206,216],"matrix":[9,39,83,91,101,136],"multiplication,":[10],"but":[11,41],"these":[12],"approaches":[13],"often":[14],"incur":[15],"substantial":[16],"area":[17,178],"and":[18,56,65,97,119,179,197,218],"power":[19,129,180,186],"overhead,":[20],"significantly":[21],"compromising":[22],"their":[23,42],"usage":[24],"in":[25,215],"dense":[26,38,55,90,135,183,225],"scenarios.":[27],"On":[28],"the":[29,107,110,122,167,185],"other":[30],"hand,":[31],"systolic":[32,75,124,169],"arrays":[33],"deliver":[34],"high":[35,60,104],"efficiency":[36,61,214,223],"for":[37,114,127,171,195,199,205,224],"operations,":[40],"application":[43],"matrices":[46,58],"remains":[47],"challenging.":[48],"An":[49],"ideal":[50],"design":[51,143],"should":[52],"process":[53],"both":[54,95,232],"with":[59,85,103,146,202],"satisfy":[63],"performance":[64],"versatility":[66],"requirements.In":[67],"this":[68],"paper,":[69],"we":[70],"introduce":[71],"DenSparSA,":[72],"a":[73,156,228],"balanced":[74],"array":[76,170],"centralized":[77],"architecture":[78],"that":[79,153],"can":[80,188],"execute":[81],"computations":[84],"minimal":[86,128],"overhead":[87,130,187],"original":[89],"computations.":[92],"DenSparSA":[93,154,208],"supports":[94],"single-side":[96],"dual-side":[98],"unstructured":[99],"multiplications":[102],"efficiency.":[105],"At":[106],"same":[108],"time,":[109],"additional":[111],"required":[113],"managing":[115],"sparsity":[116],"is":[117,144],"compact":[118],"decoupled":[120],"from":[121,159],"conventional":[123],"array,":[125],"allowing":[126],"when":[131],"switched":[132],"back":[133],"operations":[137],"via":[138],"circuit":[139],"gating.":[140],"The":[141],"implemented":[145],"Nangate":[147],"45":[148],"nm.":[149],"Implementation":[150],"results":[151],"show":[152],"achieves":[155],"speedup":[157],"ranging":[158],"$1.9":[160],"\\times$":[161,164,221],"$22":[163],"compared":[165],"classic":[168],"workloads,":[173,184],"while":[174],"maintaining":[175],"relatively":[176],"low":[177],"overhead.":[181],"For":[182],"be":[189],"reduced":[190],"$\\mathbf{1":[192],"2":[193],"\\%}$":[194],"BF16":[196],"5%":[198],"FP32.":[200],"Compared":[201],"existing":[203],"solutions":[204],"acceleration,":[207],"delivers":[209],"competitive":[210],"($0.82":[211],"\\times-1.32":[212],"\\times$)":[213],"scenarios":[217],"$1.17":[219],"\\times-2.28":[220],"better":[222,229],"scenarios,":[226],"indicating":[227],"balance":[230],"between":[231],"situations.":[233]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
