{"id":"https://openalex.org/W4407953517","doi":"https://doi.org/10.1145/3706628.3708867","title":"Systolic Sparse Tensor Slices: FPGA Building Blocks for Sparse and Dense AI Acceleration","display_name":"Systolic Sparse Tensor Slices: FPGA Building Blocks for Sparse and Dense AI Acceleration","publication_year":2025,"publication_date":"2025-02-26","ids":{"openalex":"https://openalex.org/W4407953517","doi":"https://doi.org/10.1145/3706628.3708867"},"language":"en","primary_location":{"id":"doi:10.1145/3706628.3708867","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3706628.3708867","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3706628.3708867","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084140879","display_name":"Endri Taka","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Endri Taka","raw_affiliation_strings":["The University of Texas at Austin, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036262299","display_name":"Ning-Chi Huang","orcid":"https://orcid.org/0000-0003-4663-9099"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Ning-Chi Huang","raw_affiliation_strings":["National Yang Ming Chiao Tung University, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111733211","display_name":"C. Chang","orcid":null},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chi-Chih Chang","raw_affiliation_strings":["National Yang Ming Chiao Tung University, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102963673","display_name":"K. H. Wu","orcid":"https://orcid.org/0009-0000-6931-6538"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Kai-Chiang Wu","raw_affiliation_strings":["National Yang Ming Chiao Tung University, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045858420","display_name":"Aman Arora","orcid":"https://orcid.org/0000-0003-2547-4424"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aman Arora","raw_affiliation_strings":["Arizona State University, Tempe, AZ, USA"],"affiliations":[{"raw_affiliation_string":"Arizona State University, Tempe, AZ, USA","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065985595","display_name":"Diana Marculescu","orcid":"https://orcid.org/0000-0002-5734-4221"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Diana Marculescu","raw_affiliation_strings":["The University of Texas at Austin, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5084140879"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":19.622,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.99422691,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"159","last_page":"171"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9771000146865845,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.7640771865844727},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6362224221229553},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6338492631912231},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6323666572570801},{"id":"https://openalex.org/keywords/systolic-array","display_name":"Systolic array","score":0.5965029001235962},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.5555849671363831},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.5095446705818176},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.4690081775188446},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.1941339075565338},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17354369163513184},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.14435496926307678},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.06586116552352905}],"concepts":[{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.7640771865844727},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6362224221229553},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6338492631912231},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6323666572570801},{"id":"https://openalex.org/C150741067","wikidata":"https://www.wikidata.org/wiki/Q2377218","display_name":"Systolic array","level":3,"score":0.5965029001235962},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.5555849671363831},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.5095446705818176},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.4690081775188446},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.1941339075565338},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17354369163513184},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.14435496926307678},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.06586116552352905},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C14580979","wikidata":"https://www.wikidata.org/wiki/Q876049","display_name":"Very-large-scale integration","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3706628.3708867","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3706628.3708867","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3706628.3708867","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3706628.3708867","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6899999976158142,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W2017369466","https://openalex.org/W2095258817","https://openalex.org/W2346205343","https://openalex.org/W2515080096","https://openalex.org/W2588191434","https://openalex.org/W2625954420","https://openalex.org/W2773750423","https://openalex.org/W2794952988","https://openalex.org/W2911751195","https://openalex.org/W2914304175","https://openalex.org/W2915106038","https://openalex.org/W2945146780","https://openalex.org/W2949275038","https://openalex.org/W2949619037","https://openalex.org/W2949660525","https://openalex.org/W2950656546","https://openalex.org/W2953212265","https://openalex.org/W2979310060","https://openalex.org/W2979439447","https://openalex.org/W2980186997","https://openalex.org/W2990185852","https://openalex.org/W3012178976","https://openalex.org/W3033033241","https://openalex.org/W3093571597","https://openalex.org/W3097528158","https://openalex.org/W3105753409","https://openalex.org/W3115348505","https://openalex.org/W3127736057","https://openalex.org/W3129093240","https://openalex.org/W3129734321","https://openalex.org/W3131304503","https://openalex.org/W3159322265","https://openalex.org/W3164217046","https://openalex.org/W3166559827","https://openalex.org/W3185702163","https://openalex.org/W3190062760","https://openalex.org/W3207820301","https://openalex.org/W4200369086","https://openalex.org/W4213019189","https://openalex.org/W4224010033","https://openalex.org/W4240168186","https://openalex.org/W4243682116","https://openalex.org/W4245683599","https://openalex.org/W4285130446","https://openalex.org/W4286001027","https://openalex.org/W4287363917","https://openalex.org/W4291653336","https://openalex.org/W4312443924","https://openalex.org/W4319870541","https://openalex.org/W4360831840","https://openalex.org/W4377864779","https://openalex.org/W4388923662","https://openalex.org/W4391455288","https://openalex.org/W4391455291","https://openalex.org/W4401568656","https://openalex.org/W4402148001","https://openalex.org/W4402196533","https://openalex.org/W4402196566","https://openalex.org/W4402917213","https://openalex.org/W4405644157"],"related_works":["https://openalex.org/W4240320454","https://openalex.org/W2070314832","https://openalex.org/W2395557210","https://openalex.org/W2111241003","https://openalex.org/W2347854075","https://openalex.org/W2132614232","https://openalex.org/W4321636545","https://openalex.org/W4316252394","https://openalex.org/W2072127800","https://openalex.org/W2023476765"],"abstract_inverted_index":{"FPGA":[0,22,61,138],"architectures":[1],"have":[2,27],"recently":[3],"been":[4],"enhanced":[5],"to":[6,81,108,135,145,163,172,175],"meet":[7],"the":[8,122,151],"substantial":[9],"computational":[10],"demands":[11],"of":[12,79,87,124,150,170],"modern":[13],"deep":[14],"neural":[15],"networks":[16],"(DNNs).":[17],"To":[18,50],"this":[19,52],"end,":[20],"both":[21],"vendors":[23],"and":[24,95,140,158],"academic":[25],"researchers":[26],"proposed":[28,152],"in-fabric":[29,67,177],"blocks":[30,38],"that":[31,75],"perform":[32],"efficient":[33],"tensor":[34,72],"computations.":[35],"However,":[36],"these":[37],"are":[39,121],"primarily":[40],"optimized":[41],"for":[42],"dense":[43,91,176],"computation,":[44],"while":[45],"most":[46,125],"DNNs":[47],"exhibit":[48],"sparsity.":[49],"address":[51],"limitation,":[53],"we":[54],"propose":[55],"incorporating":[56],"structured":[57],"sparsity":[58,80,106],"support":[59,76,90],"into":[60],"architectures.":[62],"We":[63],"architect":[64],"2D":[65],"systolic":[66,70],"blocks,":[68],"named":[69],"sparse":[71,130,156],"(SST)":[73],"slices,":[74],"multiple":[77],"degrees":[78],"efficiently":[82],"accelerate":[83],"a":[84,102],"wide":[85],"variety":[86],"DNNs.":[88],"SSTs":[89,153],"operation,":[92],"2:4":[93],"(50%)":[94],"1:4":[96],"(75%)":[97],"sparsity,":[98],"as":[99,101],"well":[100],"new":[103],"1:3":[104],"(66.7%)":[105],"level":[107],"further":[109],"increase":[110,169],"flexibility.":[111],"When":[112],"demonstrating":[113],"on":[114,154],"general":[115],"matrix":[116],"multiplication":[117],"(GEMM)":[118],"accelerators,":[119,128],"which":[120],"heart":[123],"current":[126],"DNN":[127],"our":[129],"SST-based":[131],"designs":[132],"attain":[133],"up":[134,162,171],"5\u00d7":[136],"higher":[137],"frequency":[139],"10.9\u00d7":[141],"lower":[142],"area,":[143],"compared":[144,174],"traditional":[146],"FPGAs.":[147],"Moreover,":[148],"evaluation":[149],"state-of-the-art":[155],"ViT":[157],"CNN":[159],"models":[160],"exhibits":[161],"3.52\u00d7":[164],"speedup":[165],"with":[166],"minimal":[167],"area":[168],"13.3%,":[173],"acceleration.":[178]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
