{"id":"https://openalex.org/W4416199122","doi":"https://doi.org/10.1145/3712285.3759826","title":"StraGCN: GPU-Accelerated Strassen\u2019s Sparse-Dense Matrix Multiplication for Graph Convolutional Network Training","display_name":"StraGCN: GPU-Accelerated Strassen\u2019s Sparse-Dense Matrix Multiplication for Graph Convolutional Network Training","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W4416199122","doi":"https://doi.org/10.1145/3712285.3759826"},"language":null,"primary_location":{"id":"doi:10.1145/3712285.3759826","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712285.3759826","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028551625","display_name":"Weidong He","orcid":"https://orcid.org/0009-0009-7058-3719"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weidong He","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033955608","display_name":"Haikun Liu","orcid":"https://orcid.org/0000-0003-4290-1408"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haikun Liu","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068662280","display_name":"Zhuohui Duan","orcid":"https://orcid.org/0000-0002-3950-3209"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuohui Duan","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022398389","display_name":"Xiaofei Liao","orcid":"https://orcid.org/0000-0001-6302-813X"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofei Liao","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100696504","display_name":"Shuhao Zhang","orcid":"https://orcid.org/0000-0002-9927-6925"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuhao Zhang","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035078935","display_name":"Fubing Mao","orcid":"https://orcid.org/0000-0003-2589-0073"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fubing Mao","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022262922","display_name":"Hai Jin","orcid":"https://orcid.org/0000-0002-3934-7605"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hai Jin","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5028551625"],"corresponding_institution_ids":["https://openalex.org/I47720641"],"apc_list":null,"apc_paid":null,"fwci":2.4849,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.92200015,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"631","last_page":"644"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.5099999904632568,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.5099999904632568,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.2395000010728836,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.04769999906420708,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adjacency-matrix","display_name":"Adjacency matrix","score":0.5618000030517578},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.5224000215530396},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4498000144958496},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.430400013923645},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.37549999356269836},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.3709000051021576},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.36230000853538513},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.3538999855518341},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.3425000011920929},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.34060001373291016}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7872999906539917},{"id":"https://openalex.org/C180356752","wikidata":"https://www.wikidata.org/wiki/Q727035","display_name":"Adjacency matrix","level":3,"score":0.5618000030517578},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.5224000215530396},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.47360000014305115},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4571000039577484},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4498000144958496},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.430400013923645},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4027999937534332},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.37549999356269836},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.3709000051021576},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.36230000853538513},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.3538999855518341},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.3425000011920929},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3382999897003174},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.335099995136261},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3222000002861023},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.3147999942302704},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.3091000020503998},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.2985000014305115},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.29840001463890076},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.28600001335144043},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2766999900341034},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C48903430","wikidata":"https://www.wikidata.org/wiki/Q491370","display_name":"Graph partition","level":3,"score":0.26600000262260437},{"id":"https://openalex.org/C88230418","wikidata":"https://www.wikidata.org/wiki/Q131476","display_name":"Graph theory","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C106516650","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm design","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C2779089604","wikidata":"https://www.wikidata.org/wiki/Q7169333","display_name":"Permission","level":2,"score":0.25780001282691956},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.25440001487731934},{"id":"https://openalex.org/C103275481","wikidata":"https://www.wikidata.org/wiki/Q6787889","display_name":"Matrix representation","level":3,"score":0.2524000108242035}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3712285.3759826","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712285.3759826","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1972907356","https://openalex.org/W1981419611","https://openalex.org/W2035476608","https://openalex.org/W2070232376","https://openalex.org/W2080063509","https://openalex.org/W2166604263","https://openalex.org/W2605316362","https://openalex.org/W2794952988","https://openalex.org/W2903262661","https://openalex.org/W2907492528","https://openalex.org/W3014127403","https://openalex.org/W3016542674","https://openalex.org/W3016832937","https://openalex.org/W3017228913","https://openalex.org/W3081031082","https://openalex.org/W3090369187","https://openalex.org/W3139769037","https://openalex.org/W3155922894","https://openalex.org/W3167254314","https://openalex.org/W3206743063","https://openalex.org/W4280509186","https://openalex.org/W4293024985","https://openalex.org/W4302010773","https://openalex.org/W4318540786","https://openalex.org/W4360831960","https://openalex.org/W4360831975","https://openalex.org/W4372267520","https://openalex.org/W4388662070","https://openalex.org/W4393407116"],"related_works":[],"abstract_inverted_index":{"Graph":[0],"Convolutional":[1],"Networks":[2],"(GCNs)":[3],"are":[4],"a":[5,17,55,108],"fundamental":[6],"approach":[7],"to":[8,24,65,94,113],"deep":[9],"learning":[10],"on":[11,44],"graph-structured":[12],"data.":[13],"However,":[14],"they":[15],"face":[16],"significant":[18],"challenge":[19],"in":[20,92],"training":[21],"efficiency":[22],"due":[23],"the":[25,38,66,86,89,119],"high":[26],"computational":[27],"cost":[28],"of":[29,76,88,122,140],"Sparse-Dense":[30],"Matrix":[31],"Multiplication":[32],"(SpMM).":[33],"This":[34],"paper":[35],"presents":[36],"StraGCN,":[37],"first":[39],"GPU-accelerated":[40],"SpMM":[41],"implementation":[42],"based":[43],"Strassen\u2019s":[45,81],"algorithm":[46],"particularly":[47],"designed":[48],"for":[49,59,80],"GCN":[50,151],"training.":[51],"First,":[52],"we":[53,106],"propose":[54,107],"horizontal":[56],"fusion":[57],"model":[58],"GPU":[60],"kernels":[61],"as":[62],"an":[63],"alternative":[64],"commonly":[67],"used":[68],"multi-stream":[69],"CUDA":[70],"model,":[71],"significantly":[72],"improving":[73],"data":[74],"locality":[75],"on-chip":[77],"shared":[78],"memory":[79],"SpMM.":[82],"Second,":[83],"StraGCN":[84,127,136],"exploits":[85],"immutability":[87],"adjacency":[90],"matrix":[91,110],"GCNs":[93],"reuse":[95],"intermediate":[96],"results":[97,133],"from":[98],"submatrix":[99],"operations,":[100],"substantially":[101],"reducing":[102],"redundant":[103],"computations.":[104],"Third,":[105],"two-stage":[109],"partitioning":[111],"scheme":[112],"mitigate":[114],"load":[115],"imbalance":[116],"caused":[117],"by":[118],"irregular":[120],"distribution":[121],"non-zero":[123],"elements.":[124],"We":[125],"evaluate":[126],"with":[128,149],"fifteen":[129],"benchmark":[130],"datasets.":[131],"Experimental":[132],"show":[134],"that":[135],"achieves":[137],"performance":[138],"speedups":[139],"2.1":[141],"\u00d7,":[142,144],"2.6":[143],"and":[145,154],"3.3":[146],"\u00d7":[147],"compared":[148],"state-of-the-art":[150],"frameworks\u2013GNNA,":[152],"PyG,":[153],"DGL,":[155],"respectively.":[156]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-11-12T00:00:00"}
