{"id":"https://openalex.org/W4416199190","doi":"https://doi.org/10.1145/3712285.3759849","title":"Bridging the Gap between Unstructured SpMM and Structured Sparse Tensor Cores","display_name":"Bridging the Gap between Unstructured SpMM and Structured Sparse Tensor Cores","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W4416199190","doi":"https://doi.org/10.1145/3712285.3759849"},"language":null,"primary_location":{"id":"doi:10.1145/3712285.3759849","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712285.3759849","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107953160","display_name":"Y. Dong","orcid":"https://orcid.org/0009-0004-4635-1903"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yukang Dong","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0004-4635-1903","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120747193","display_name":"Ziyuan Shen","orcid":"https://orcid.org/0009-0003-3572-0999"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziyuan Shen","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0003-3572-0999","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003231472","display_name":"Wenbin Jiang","orcid":"https://orcid.org/0000-0001-5628-8806"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbin Jiang","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0001-5628-8806","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075773640","display_name":"Zhenghang Liu","orcid":"https://orcid.org/0009-0003-2055-5476"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenghang Liu","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0003-2055-5476","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ye Xu","orcid":"https://orcid.org/0009-0004-3662-4226"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Xu","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0004-3662-4226","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120496905","display_name":"Bingyi He","orcid":"https://orcid.org/0009-0008-6138-1958"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bingyi He","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuahn, China"],"raw_orcid":"https://orcid.org/0009-0008-6138-1958","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuahn, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077094802","display_name":"Ran Zheng","orcid":"https://orcid.org/0000-0002-3058-7581"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ran Zheng","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-3058-7581","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022262922","display_name":"Hai Jin","orcid":"https://orcid.org/0000-0002-3934-7605"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hai Jin","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-3934-7605","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5107953160"],"corresponding_institution_ids":["https://openalex.org/I47720641"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.36699229,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"645","last_page":"660"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6025000214576721,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6025000214576721,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.09880000352859497,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.06530000269412994,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.7123000025749207},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.6344000101089478},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5823000073432922},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5537999868392944},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.5491999983787537},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.4636000096797943},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4375999867916107},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.42969998717308044},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4237000048160553},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.40380001068115234}],"concepts":[{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.7123000025749207},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6428999900817871},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.6344000101089478},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5823000073432922},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5537999868392944},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.5491999983787537},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.48579999804496765},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.4636000096797943},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4408000111579895},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4375999867916107},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.42969998717308044},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4237000048160553},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.40380001068115234},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3871000111103058},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.38519999384880066},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3652999997138977},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.35269999504089355},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.349700003862381},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.3240000009536743},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.32120001316070557},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.313400000333786},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C2986737658","wikidata":"https://www.wikidata.org/wiki/Q30103009","display_name":"Tensor decomposition","level":3,"score":0.3046000003814697},{"id":"https://openalex.org/C88230418","wikidata":"https://www.wikidata.org/wiki/Q131476","display_name":"Graph theory","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.28859999775886536},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.287200003862381},{"id":"https://openalex.org/C100595998","wikidata":"https://www.wikidata.org/wiki/Q11731931","display_name":"Graph kernel","level":5,"score":0.28209999203681946},{"id":"https://openalex.org/C2778648169","wikidata":"https://www.wikidata.org/wiki/Q967768","display_name":"Compatibility (geochemistry)","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C54848796","wikidata":"https://www.wikidata.org/wiki/Q339011","display_name":"Symmetric matrix","level":3,"score":0.2736000120639801},{"id":"https://openalex.org/C51255310","wikidata":"https://www.wikidata.org/wiki/Q1163016","display_name":"Tensor product","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C124066611","wikidata":"https://www.wikidata.org/wiki/Q28684319","display_name":"Sparse approximation","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C85817219","wikidata":"https://www.wikidata.org/wiki/Q884772","display_name":"Block matrix","level":3,"score":0.2623000144958496}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3712285.3759849","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712285.3759849","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G10113166","display_name":null,"funder_award_id":"No.2022YFB4501400","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G3912833078","display_name":null,"funder_award_id":"No.62372199","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1484040084","https://openalex.org/W2035080386","https://openalex.org/W2080090262","https://openalex.org/W2145085734","https://openalex.org/W2194775991","https://openalex.org/W2908664940","https://openalex.org/W2980186997","https://openalex.org/W2999347085","https://openalex.org/W3043303806","https://openalex.org/W3104045073","https://openalex.org/W3201937876","https://openalex.org/W4321636758","https://openalex.org/W4327911434","https://openalex.org/W4381158108","https://openalex.org/W4384705403","https://openalex.org/W4388667495","https://openalex.org/W4389386944","https://openalex.org/W4391986945","https://openalex.org/W4400409880","https://openalex.org/W4401018581","https://openalex.org/W4401090539","https://openalex.org/W4401408710","https://openalex.org/W4404417996","https://openalex.org/W4405755887","https://openalex.org/W4406617380"],"related_works":[],"abstract_inverted_index":{"The":[0],"acceleration":[1],"of":[2,28,42,56,119,140],"Sparse-dense":[3],"Matrix":[4],"Multiplication":[5],"(SpMM)":[6],"using":[7],"Tensor":[8,46],"Cores":[9,47],"(TCs)":[10],"in":[11,35,98],"GPUs":[12,125],"has":[13],"recently":[14],"garnered":[15],"significant":[16],"attention.":[17],"TCs":[18],"are":[19,61],"designed":[20],"for":[21],"block-wise":[22],"matrix":[23],"multiplication,":[24],"however,":[25,59],"block":[26],"partitioning":[27],"general":[29,85],"unstructured":[30],"sparse":[31,86],"matrices":[32,87],"often":[33],"results":[34],"low-level":[36],"density,":[37],"causing":[38],"a":[39,77,104,109],"substantial":[40],"waste":[41],"computational":[43],"resources.":[44],"Sparse":[45],"(SpTCs)":[48],"can":[49],"mitigate":[50],"this":[51,72],"issue":[52],"by":[53],"skipping":[54],"50%":[55],"zero":[57],"values,":[58],"SpTCs":[60],"limited":[62],"to":[63,144,151],"strict":[64],"2:4":[65],"or":[66],"1:2":[67],"structured":[68,89],"sparsity.":[69],"To":[70],"bridge":[71],"gap,":[73],"we":[74,102],"propose":[75],"MP-SpMM,":[76],"novel":[78,105],"Matching":[79],"and":[80,108,134,147],"Padding":[81],"approach":[82],"that":[83,114,127],"transforms":[84],"into":[88],"sparsity,":[90],"drawing":[91],"inspiration":[92],"from":[93],"the":[94,117],"maximum":[95],"matching":[96],"problem":[97],"graph":[99],"theory.":[100],"Moreover,":[101],"introduce":[103],"storage":[106],"format":[107],"highly":[110],"optimized":[111],"GPU":[112],"kernel":[113],"fully":[115],"exploits":[116],"capabilities":[118],"SpTCs.":[120],"Extensive":[121],"experiments":[122],"on":[123],"modern":[124],"demonstrate":[126],"MP-SpMM":[128],"outperforms":[129],"state-of-the-art":[130],"SpMM":[131],"libraries,":[132],"DTC-SpMM":[133],"RoDe,":[135],"with":[136],"an":[137],"average":[138],"speedup":[139],"2.42":[141],"\u00d7":[142,149],"(up":[143,150],"7.65":[145],"\u00d7)":[146],"1.92":[148],"8.60":[152],"\u00d7).":[153]},"counts_by_year":[],"updated_date":"2025-11-28T07:35:41.568135","created_date":"2025-11-12T00:00:00"}
