{"id":"https://openalex.org/W4293084046","doi":"https://doi.org/10.1145/3543068","title":"ASA: <u>A</u> ccelerating <u>S</u> parse <u>A</u> ccumulation in Column-wise SpGEMM","display_name":"ASA: <u>A</u> ccelerating <u>S</u> parse <u>A</u> ccumulation in Column-wise SpGEMM","publication_year":2022,"publication_date":"2022-06-11","ids":{"openalex":"https://openalex.org/W4293084046","doi":"https://doi.org/10.1145/3543068"},"language":"en","primary_location":{"id":"doi:10.1145/3543068","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3543068","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3543068","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3543068","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035780660","display_name":"Chao Zhang","orcid":"https://orcid.org/0000-0002-7892-5113"},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chao Zhang","raw_affiliation_strings":["Lehigh University"],"affiliations":[{"raw_affiliation_string":"Lehigh University","institution_ids":["https://openalex.org/I186143895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029390021","display_name":"Maximilian Bremer","orcid":"https://orcid.org/0000-0002-5940-3432"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maximilian Bremer","raw_affiliation_strings":["Lawrence Berkeley National Laboratory"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005140361","display_name":"Cy Chan","orcid":"https://orcid.org/0000-0001-6881-827X"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cy Chan","raw_affiliation_strings":["Lawrence Berkeley National Laboratory"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010873686","display_name":"John Shalf","orcid":null},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John Shalf","raw_affiliation_strings":["Lawrence Berkeley National Laboratory"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028757963","display_name":"Xiaochen Guo","orcid":"https://orcid.org/0000-0001-7704-0412"},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaochen Guo","raw_affiliation_strings":["Lehigh University"],"affiliations":[{"raw_affiliation_string":"Lehigh University","institution_ids":["https://openalex.org/I186143895"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5035780660"],"corresponding_institution_ids":["https://openalex.org/I186143895"],"apc_list":null,"apc_paid":null,"fwci":1.3911,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.79728507,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"19","issue":"4","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7716749906539917},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.6734031438827515},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5935094356536865},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.582653820514679},{"id":"https://openalex.org/keywords/column","display_name":"Column (typography)","score":0.5011961460113525},{"id":"https://openalex.org/keywords/hash-table","display_name":"Hash table","score":0.4834926426410675},{"id":"https://openalex.org/keywords/linear-algebra","display_name":"Linear algebra","score":0.4313645362854004},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.42582160234451294},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.41711658239364624},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4071623682975769},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16237705945968628},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.11116236448287964}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7716749906539917},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.6734031438827515},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5935094356536865},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.582653820514679},{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.5011961460113525},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.4834926426410675},{"id":"https://openalex.org/C139352143","wikidata":"https://www.wikidata.org/wiki/Q82571","display_name":"Linear algebra","level":2,"score":0.4313645362854004},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.42582160234451294},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.41711658239364624},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4071623682975769},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16237705945968628},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.11116236448287964},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3543068","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3543068","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3543068","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3543068","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3543068","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3543068","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4293084046.pdf","grobid_xml":"https://content.openalex.org/works/W4293084046.grobid-xml"},"referenced_works_count":56,"referenced_works":["https://openalex.org/W283324283","https://openalex.org/W1133087070","https://openalex.org/W1884140786","https://openalex.org/W1973262089","https://openalex.org/W1990832096","https://openalex.org/W1991173951","https://openalex.org/W2007259720","https://openalex.org/W2009654791","https://openalex.org/W2024818089","https://openalex.org/W2048275941","https://openalex.org/W2065607954","https://openalex.org/W2080353445","https://openalex.org/W2089437293","https://openalex.org/W2121564430","https://openalex.org/W2134237243","https://openalex.org/W2134633067","https://openalex.org/W2141380216","https://openalex.org/W2142422634","https://openalex.org/W2156125289","https://openalex.org/W2159094788","https://openalex.org/W2170382128","https://openalex.org/W2234355962","https://openalex.org/W2235832317","https://openalex.org/W2469279958","https://openalex.org/W2725159389","https://openalex.org/W2748328505","https://openalex.org/W2781869913","https://openalex.org/W2794952988","https://openalex.org/W2899486927","https://openalex.org/W2964336816","https://openalex.org/W2979350933","https://openalex.org/W2979707853","https://openalex.org/W2979747168","https://openalex.org/W2980113464","https://openalex.org/W2980119420","https://openalex.org/W2989899068","https://openalex.org/W3016542674","https://openalex.org/W3016832937","https://openalex.org/W3016842236","https://openalex.org/W3041191963","https://openalex.org/W3043796721","https://openalex.org/W3105937213","https://openalex.org/W3106161546","https://openalex.org/W3117042881","https://openalex.org/W3152508978","https://openalex.org/W3155922894","https://openalex.org/W3158831985","https://openalex.org/W3160646870","https://openalex.org/W3173260040","https://openalex.org/W3216997764","https://openalex.org/W4200378788","https://openalex.org/W4233714390","https://openalex.org/W4245923077","https://openalex.org/W4296246463","https://openalex.org/W6638337918","https://openalex.org/W6704139216"],"related_works":["https://openalex.org/W2529540995","https://openalex.org/W2001175489","https://openalex.org/W2027790231","https://openalex.org/W4312354936","https://openalex.org/W1993704253","https://openalex.org/W2144511445","https://openalex.org/W2083601972","https://openalex.org/W2410461480","https://openalex.org/W2796095821","https://openalex.org/W2079736157"],"abstract_inverted_index":{"Sparse":[0],"linear":[1,161],"algebra":[2],"is":[3,70,136],"an":[4,42,170,267,301],"important":[5],"kernel":[6],"in":[7,54,87,178,220,235,306],"many":[8],"different":[9,39,46],"applications.":[10],"Among":[11],"various":[12],"sparse":[13,50],"general":[14],"matrix-matrix":[15],"multiplication":[16],"(SpGEMM)":[17],"algorithms,":[18],"Gustavson\u2019s":[19],"column-wise":[20,55],"SpGEMM":[21,288,308],"has":[22],"good":[23],"locality":[24],"when":[25],"reading":[26],"input":[27],"matrix":[28,44],"and":[29,155,163,207,233,254,271,286],"can":[30],"be":[31],"easily":[32],"parallelized":[33],"by":[34,66,200],"distributing":[35],"the":[36,49,64,67,88,93,97,108,112,131,139,142,151,174,182,192,196,203,231,242,257,277,307],"computation":[37],"of":[38,41,63,100,133,141,173,176,181,198,246,259,269,281,303],"columns":[40],"output":[43,183],"to":[45,96,110,127,149,165,190,216,229,250,276,293],"processors.":[47],"However,":[48],"accumulation":[51,132,234],"(SPA)":[52],"step":[53],"SpGEMM,":[56],"which":[57,90,145],"merges":[58],"partial":[59,84,134,204],"sums":[60],"from":[61],"each":[62,179],"multiplications":[65],"row":[68],"indices,":[69],"still":[71],"a":[72,80,124,210,225,236,247,263,282,294],"performance":[73],"bottleneck.":[74],"The":[75],"state-of-the-art":[76,278,295],"software":[77,279],"implementation":[78,280],"uses":[79],"hash":[81,115,143,152,157,221],"table":[82],"for":[83,123],"sum":[85,135,205],"search":[86,162,206,232,244,252],"SPA,":[89],"makes":[91,146],"SPA":[92,109,199],"largest":[94],"contributor":[95],"execution":[98],"time":[99],"SpGEMM.":[101],"There":[102],"are":[103,121],"three":[104],"reasons":[105],"that":[106,120],"cause":[107],"become":[111],"bottleneck:":[113],"(1)":[114,201],"probing":[116,153],"requires":[117,159],"data-dependent":[118,218],"branches":[119,219],"difficult":[122,148],"branch":[125],"predictor":[126],"predict":[128],"correctly;":[129],"(2)":[130,223],"dependent":[137],"on":[138,241],"results":[140],"probing,":[144,222],"it":[147],"hide":[150],"latency;":[154],"(3)":[156,239],"collision":[158],"time-consuming":[160],"optimizations":[164],"reduce":[166,251],"these":[167],"collisions":[168],"require":[169],"accurate":[171],"estimation":[172],"number":[175],"non-zeros":[177],"column":[180],"matrix.":[184],"This":[185],"work":[186],"proposes":[187],"ASA":[188,194,265,299],"architecture":[189],"accelerate":[191],"SPA.":[193],"overcomes":[195],"challenges":[197],"executing":[202],"accumulate":[208],"with":[209],"single":[211],"instruction":[212],"through":[213],"ISA":[214],"extension":[215],"eliminate":[217],"using":[224],"dedicated":[226],"on-chip":[227],"cache":[228,249],"perform":[230],"pipelined":[237],"fashion,":[238],"relying":[240],"parallel":[243],"capability":[245],"set-associative":[248],"latency,":[253],"(4)":[255],"delaying":[256],"merging":[258],"overflowed":[260],"entries.":[261],"As":[262,291],"result,":[264],"achieves":[266,300],"average":[268,302],"2.25\u00d7":[270],"5.05\u00d7":[272],"speedup":[273,305],"as":[274],"compared":[275,292],"Markov":[283],"clustering":[284],"application":[285],"its":[287],"kernel,":[289],"respectively.":[290],"hashing":[296],"accelerator":[297],"design,":[298],"1.95\u00d7":[304],"kernel.":[309]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
