{"id":"https://openalex.org/W4414693300","doi":"https://doi.org/10.1109/tpds.2025.3616981","title":"SSpMM: Efficiently <u>S</u> calable <u>SpMM</u> Kernels Across Multiple Generations of Tensor Cores","display_name":"SSpMM: Efficiently <u>S</u> calable <u>SpMM</u> Kernels Across Multiple Generations of Tensor Cores","publication_year":2025,"publication_date":"2025-10-01","ids":{"openalex":"https://openalex.org/W4414693300","doi":"https://doi.org/10.1109/tpds.2025.3616981"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2025.3616981","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2025.3616981","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103112899","display_name":"Zeyu Xue","orcid":"https://orcid.org/0009-0007-6374-2916"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeyu Xue","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China","College of Computer Science and Technology, National University of Defense Technology, China"],"raw_orcid":"https://orcid.org/0009-0007-6374-2916","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101937502","display_name":"Mei Wen","orcid":"https://orcid.org/0000-0002-5875-3297"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mei Wen","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China","College of Computer Science and Technology, National University of Defense Technology, China"],"raw_orcid":"https://orcid.org/0000-0002-5875-3297","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jianchao Yang","orcid":"https://orcid.org/0000-0001-7016-7672"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianchao Yang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China","College of Computer Science and Technology, National University of Defense Technology, China"],"raw_orcid":"https://orcid.org/0000-0001-7016-7672","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Minjin Tang","orcid":"https://orcid.org/0000-0001-7718-3085"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minjin Tang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China","College of Computer Science and Technology, National University of Defense Technology, China"],"raw_orcid":"https://orcid.org/0000-0001-7718-3085","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109667607","display_name":"Zhongdi Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongdi Luo","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China","College of Computer Science and Technology, National University of Defense Technology, China"],"raw_orcid":"https://orcid.org/0009-0009-7120-9729","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jing Feng","orcid":"https://orcid.org/0009-0008-3854-5336"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Feng","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China","College of Computer Science and Technology, National University of Defense Technology, China"],"raw_orcid":"https://orcid.org/0009-0008-3854-5336","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101825210","display_name":"Yang Shi","orcid":"https://orcid.org/0000-0001-5786-3171"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Shi","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China","College of Computer Science and Technology, National University of Defense Technology, China"],"raw_orcid":"https://orcid.org/0000-0001-5786-3171","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101981758","display_name":"Zhaoyun Chen","orcid":"https://orcid.org/0000-0003-1552-8396"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaoyun Chen","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China","College of Computer Science and Technology, National University of Defense Technology, China"],"raw_orcid":"https://orcid.org/0000-0003-1552-8396","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037428152","display_name":"Junzhong Shen","orcid":"https://orcid.org/0000-0001-6233-6800"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junzhong Shen","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China","College of Computer Science and Technology, National University of Defense Technology, China"],"raw_orcid":"https://orcid.org/0000-0001-6233-6800","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087562583","display_name":"Johannes Langguth","orcid":"https://orcid.org/0000-0003-4200-511X"},"institutions":[{"id":"https://openalex.org/I2799829267","display_name":"Simula Research Laboratory","ror":"https://ror.org/00vn06n10","country_code":"NO","type":"facility","lineage":["https://openalex.org/I2799829267"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Johannes Langguth","raw_affiliation_strings":["Simula Research Laboratory, Oslo, Norway","Simula Research Laboratory, Norway"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Simula Research Laboratory, Oslo, Norway","institution_ids":["https://openalex.org/I2799829267"]},{"raw_affiliation_string":"Simula Research Laboratory, Norway","institution_ids":["https://openalex.org/I2799829267"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1547,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.79846285,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"36","issue":"12","first_page":"2652","last_page":"2667"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13650","display_name":"Computational Physics and Python Applications","score":0.9625999927520752,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9309999942779541,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6848000288009644},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.645799994468689},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6187999844551086},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.5465999841690063},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.516700029373169},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.47510001063346863},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.4717000126838684},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.41600000858306885}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7462000250816345},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6848000288009644},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.645799994468689},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6187999844551086},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.5465999841690063},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.516700029373169},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.49709999561309814},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.47510001063346863},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.4717000126838684},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.41600000858306885},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.40220001339912415},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3871000111103058},{"id":"https://openalex.org/C106516650","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm design","level":2,"score":0.3402999937534332},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.3384999930858612},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3382999897003174},{"id":"https://openalex.org/C39096654","wikidata":"https://www.wikidata.org/wiki/Q728507","display_name":"Strassen algorithm","level":4,"score":0.3327000141143799},{"id":"https://openalex.org/C124007464","wikidata":"https://www.wikidata.org/wiki/Q428091","display_name":"Tensor contraction","level":3,"score":0.3319000005722046},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.32710000872612},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3149000108242035},{"id":"https://openalex.org/C166077713","wikidata":"https://www.wikidata.org/wiki/Q1758924","display_name":"Tensor field","level":3,"score":0.3001999855041504},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.2946000099182129},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.27799999713897705},{"id":"https://openalex.org/C2777749129","wikidata":"https://www.wikidata.org/wiki/Q17148469","display_name":"Robust principal component analysis","level":3,"score":0.2655999958515167},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.2578999996185303}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2025.3616981","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2025.3616981","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6280195012","display_name":null,"funder_award_id":"2023JJ40679","funder_id":"https://openalex.org/F4320322866","funder_display_name":"Natural Science Foundation of Hainan Province"},{"id":"https://openalex.org/G7120436939","display_name":null,"funder_award_id":"2024JJ6470","funder_id":"https://openalex.org/F4320322866","funder_display_name":"Natural Science Foundation of Hainan Province"}],"funders":[{"id":"https://openalex.org/F4320322866","display_name":"Natural Science Foundation of Hainan Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W2035080386","https://openalex.org/W2808128431","https://openalex.org/W2914631005","https://openalex.org/W2963363373","https://openalex.org/W2964337156","https://openalex.org/W2979310060","https://openalex.org/W2979691890","https://openalex.org/W2980186997","https://openalex.org/W2990029213","https://openalex.org/W2999347085","https://openalex.org/W3012561096","https://openalex.org/W3031930374","https://openalex.org/W3040646053","https://openalex.org/W3091170309","https://openalex.org/W3092640258","https://openalex.org/W3110597019","https://openalex.org/W3130660608","https://openalex.org/W3132616766","https://openalex.org/W3132695675","https://openalex.org/W3136309901","https://openalex.org/W3175189837","https://openalex.org/W3175878603","https://openalex.org/W3208099998","https://openalex.org/W4214711850","https://openalex.org/W4220912491","https://openalex.org/W4285104934","https://openalex.org/W4293023340","https://openalex.org/W4293024985","https://openalex.org/W4308083526","https://openalex.org/W4312771767","https://openalex.org/W4316252382","https://openalex.org/W4318328338","https://openalex.org/W4318969866","https://openalex.org/W4321636621","https://openalex.org/W4321636675","https://openalex.org/W4322576898","https://openalex.org/W4381748048","https://openalex.org/W4384705403","https://openalex.org/W4385245566","https://openalex.org/W4385270041","https://openalex.org/W4387682272","https://openalex.org/W4388661983","https://openalex.org/W4388667495","https://openalex.org/W4389576338","https://openalex.org/W4390097706","https://openalex.org/W4395106472","https://openalex.org/W4401211590","https://openalex.org/W4401408710","https://openalex.org/W4402079940","https://openalex.org/W4403295008"],"related_works":[],"abstract_inverted_index":{"Sparse-Dense":[0],"Matrix-Matrix":[1],"Multiplication":[2],"(SpMM)":[3],"has":[4],"emerged":[5],"as":[6],"a":[7,70,76,124,186],"foundational":[8],"primitive":[9],"in":[10,28,84,189],"HPC":[11],"and":[12,82,135,170],"AI.":[13],"Recent":[14],"advancements":[15],"have":[16,67,180],"aimed":[17],"to":[18,114,194],"accelerate":[19],"SpMM":[20,51,71,176],"by":[21],"harnessing":[22],"the":[23,60,94,110,174],"powerful":[24],"Tensor":[25,45,56,86,146],"Cores":[26,57],"found":[27],"modern":[29],"GPUs.":[30],"However,":[31,75],"despite":[32],"these":[33],"efforts,":[34],"existing":[35],"methods":[36],"frequently":[37],"encounter":[38],"performance":[39,83],"degradation":[40],"when":[41],"ported":[42],"across":[43,52],"different":[44],"Core":[46,87,147],"architectures.":[47],"Recognizing":[48],"that":[49,126,160],"scalable":[50,131],"multiple":[53],"generations":[54,144],"of":[55,63,145,166],"relies":[58],"on":[59,142],"effective":[61],"use":[62],"general-purpose":[64],"instructions,":[65],"we":[66,92,108,119,179],"meticulously":[68],"developed":[69],"library":[72],"named":[73],"SSpMM.":[74],"significant":[77],"conflict":[78],"exists":[79],"between":[80],"granularity":[81],"current":[85],"instructions.":[88,106],"To":[89],"resolve":[90],"this,":[91],"introduce":[93,120],"innovative":[95],"Transpose":[96],"Mapping":[97],"Scheme,":[98],"which":[99],"elegantly":[100],"implements":[101],"fine-grained":[102],"kernels":[103,129],"using":[104,149],"coarse-grained":[105],"Additionally,":[107],"propose":[109],"Register":[111],"Shuffle":[112],"Method":[113],"further":[115],"enhance":[116],"performance.":[117],"Finally,":[118],"Sparse":[121],"Vector":[122],"Compression,":[123],"technique":[125],"ensures":[127],"our":[128],"are":[130],"with":[132],"both":[133],"structured":[134],"unstructured":[136],"sparsity.":[137],"Our":[138],"experimental":[139],"results,":[140],"conducted":[141],"four":[143],"GPUs":[148],"over":[150,173],"3,000":[151],"sparse":[152],"matrices":[153],"from":[154],"well":[155],"established":[156],"matrix":[157],"collections,":[158],"demonstrate":[159],"SSpMM":[161,182],"achieves":[162],"an":[163],"average":[164],"speedup":[165,188],"2.04\u00d7,":[167],"2.81\u00d7,":[168],"2.07\u00d7,":[169],"1.87\u00d7,":[171],"respectively,":[172],"state-of-the-art":[175],"solution.":[177],"Furthermore,":[178],"integrated":[181],"into":[183],"PyTorch,":[184],"achieving":[185],"1.81\u00d7":[187],"end-to-end":[190],"Transformer":[191],"inference":[192],"compared":[193],"cuDNN.":[195]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
