{"id":"https://openalex.org/W4401016977","doi":"https://doi.org/10.1109/tcad.2024.3434359","title":"SPSA: Exploring Sparse-Packing Computation on Systolic Arrays From Scratch","display_name":"SPSA: Exploring Sparse-Packing Computation on Systolic Arrays From Scratch","publication_year":2024,"publication_date":"2024-07-26","ids":{"openalex":"https://openalex.org/W4401016977","doi":"https://doi.org/10.1109/tcad.2024.3434359"},"language":"en","primary_location":{"id":"doi:10.1109/tcad.2024.3434359","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2024.3434359","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072361503","display_name":"Minjin Tang","orcid":"https://orcid.org/0009-0000-2254-4912"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Minjin Tang","raw_affiliation_strings":["Key Laboratory of Advanced Microprocessor Chips and Systems, National University of Defense Technology, Changsha, China","National University of Defense Technology, Key Laboratory of Advanced Microprocessor Chips and Systems, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Advanced Microprocessor Chips and Systems, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology, Key Laboratory of Advanced Microprocessor Chips and Systems, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101937502","display_name":"Mei Wen","orcid":"https://orcid.org/0000-0002-5875-3297"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mei Wen","raw_affiliation_strings":["Key Laboratory of Advanced Microprocessor Chips and Systems, National University of Defense Technology, Changsha, China","National University of Defense Technology, Key Laboratory of Advanced Microprocessor Chips and Systems, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Advanced Microprocessor Chips and Systems, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology, Key Laboratory of Advanced Microprocessor Chips and Systems, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100381753","display_name":"Shuicheng Yan","orcid":"https://orcid.org/0000-0001-8906-3777"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianchao Yang","raw_affiliation_strings":["Key Laboratory of Advanced Microprocessor Chips and Systems, National University of Defense Technology, Changsha, China","National University of Defense Technology, Key Laboratory of Advanced Microprocessor Chips and Systems, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Advanced Microprocessor Chips and Systems, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology, Key Laboratory of Advanced Microprocessor Chips and Systems, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103112899","display_name":"Zeyu Xue","orcid":"https://orcid.org/0009-0007-6374-2916"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeyu Xue","raw_affiliation_strings":["Key Laboratory of Advanced Microprocessor Chips and Systems, National University of Defense Technology, Changsha, China","National University of Defense Technology, Key Laboratory of Advanced Microprocessor Chips and Systems, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Advanced Microprocessor Chips and Systems, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology, Key Laboratory of Advanced Microprocessor Chips and Systems, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037428152","display_name":"Junzhong Shen","orcid":"https://orcid.org/0000-0001-6233-6800"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junzhong Shen","raw_affiliation_strings":["Key Laboratory of Advanced Microprocessor Chips and Systems, National University of Defense Technology, Changsha, China","National University of Defense Technology, Key Laboratory of Advanced Microprocessor Chips and Systems, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Advanced Microprocessor Chips and Systems, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology, Key Laboratory of Advanced Microprocessor Chips and Systems, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5072361503"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.4342,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.61703252,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"44","issue":"2","first_page":"497","last_page":"511"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11522","display_name":"VLSI and FPGA Design Techniques","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11522","display_name":"VLSI and FPGA Design Techniques","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scratch","display_name":"Scratch","score":0.7015694975852966},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6543798446655273},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6088143587112427},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5354725122451782},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3886975646018982},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.23784637451171875},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.11029517650604248}],"concepts":[{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.7015694975852966},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6543798446655273},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6088143587112427},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5354725122451782},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3886975646018982},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.23784637451171875},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.11029517650604248}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcad.2024.3434359","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2024.3434359","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W182691100","https://openalex.org/W1506342804","https://openalex.org/W2017369466","https://openalex.org/W2035080386","https://openalex.org/W2585720638","https://openalex.org/W2606722458","https://openalex.org/W2625457103","https://openalex.org/W2773750423","https://openalex.org/W2794952988","https://openalex.org/W2900228909","https://openalex.org/W2904902077","https://openalex.org/W2906043559","https://openalex.org/W2915106038","https://openalex.org/W2945146780","https://openalex.org/W2950656546","https://openalex.org/W2970106668","https://openalex.org/W2979310060","https://openalex.org/W2979439447","https://openalex.org/W2980186997","https://openalex.org/W3016542674","https://openalex.org/W3016832937","https://openalex.org/W3040024858","https://openalex.org/W3092156174","https://openalex.org/W3100023286","https://openalex.org/W3102587717","https://openalex.org/W3103168911","https://openalex.org/W3105802176","https://openalex.org/W3121828480","https://openalex.org/W3130660608","https://openalex.org/W3185702163","https://openalex.org/W3206453033","https://openalex.org/W3208099998","https://openalex.org/W4224612674","https://openalex.org/W4312240403","https://openalex.org/W4316252394","https://openalex.org/W4321636621","https://openalex.org/W4361732751","https://openalex.org/W6772230799"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2475116013","https://openalex.org/W2770018148","https://openalex.org/W2358308169","https://openalex.org/W2385135707","https://openalex.org/W2140315382","https://openalex.org/W2059109728","https://openalex.org/W322691623","https://openalex.org/W1429949169"],"abstract_inverted_index":{"Sparse":[0],"matrix-matrix":[1],"multiplication":[2],"(SpMM)":[3],"and":[4,18,101,137,171,211,232],"Generalized":[5],"SpMM":[6],"(SpGEMM)":[7],"are":[8],"essential":[9],"computational":[10,138],"kernels":[11],"in":[12,215],"domains,":[13],"such":[14],"as":[15,28],"graph":[16],"analytics":[17],"scientific":[19],"computation.":[20],"While":[21],"systolic":[22,86,129],"arrays":[23,87],"have":[24,117],"traditionally":[25],"been":[26],"employed":[27],"specialized":[29],"architectures":[30],"for":[31,85,119],"complex":[32],"computing":[33],"problems":[34],"like":[35],"matrix":[36,91,103],"multiplication,":[37],"they":[38],"exhibit":[39],"inefficiency":[40,47],"when":[41],"dealing":[42],"with":[43,148],"sparse":[44,90,113,152],"matrices.":[45],"This":[46],"arises":[48],"from":[49],"the":[50,67,107,112,120,168,227],"unnecessary":[51],"operations":[52],"performed":[53],"by":[54,105],"processing":[55],"elements":[56],"(PEs)":[57],"that":[58,79],"contain":[59],"zero-valued":[60,99],"entries,":[61],"which":[62,133],"do":[63],"not":[64],"contribute":[65],"to":[66,88,128,167],"final":[68],"result.":[69],"To":[70],"address":[71],"this":[72],"issue,":[73],"we":[74,116],"propose":[75],"SPSA,":[76],"a":[77,81,123,156,174,220],"framework":[78],"leverages":[80],"sparse-packing":[82],"algorithm":[83],"suitable":[84],"accelerate":[89],"computations.":[92],"Our":[93],"approach":[94],"achieves":[95],"significant":[96],"reduction":[97],"of":[98,111,193,223,229],"items":[100],"improves":[102],"density":[104],"packing":[106],"rows":[108],"or":[109],"columns":[110],"matrix.":[114],"Furthermore,":[115],"introduced":[118],"first":[121],"time":[122],"data":[124],"representation":[125],"format":[126],"tailored":[127],"arrays,":[130],"called":[131],"CSXD,":[132],"further":[134,172],"enhances":[135],"storage":[136,190],"efficiency.":[139],"Importantly,":[140],"our":[141],"adaptation":[142],"scheme":[143],"enables":[144],"acceleration":[145],"benefits":[146],"even":[147],"limited":[149],"resources.":[150],"Through":[151,201],"packing,":[153],"SPSA":[154,204,218],"achieved":[155,219],"<inline-formula":[157,175,194],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[158,176,195],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[159,177,196],"<tex-math":[160,178,197],"notation=\"LaTeX\">$5.2\\times":[161],"$":[162,180,199],"</tex-math></inline-formula>":[163,181],"performance":[164,221],"improvement":[165,192,222],"compared":[166],"dense":[169],"baseline,":[170],"reached":[173],"notation=\"LaTeX\">$6.4\\times":[179],"enhancement":[182],"via":[183],"CSXD.":[184],"Simultaneously,":[185],"CSXD":[186],"realized":[187],"an":[188],"average":[189],"efficiency":[191],"notation=\"LaTeX\">$15.0\\times":[198],"</tex-math></inline-formula>.":[200],"extensive":[202],"evaluations,":[203,217],"outperforms":[205],"previous":[206],"designs":[207],"on":[208],"CPU,":[209],"GPU,":[210],"ASIC":[212],"platforms.":[213],"Finally,":[214],"end-to-end":[216],"3.9":[224],"times":[225],"across":[226],"workloads":[228],"BERT,":[230],"VGG19,":[231],"ResNet50.":[233]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
