{"id":"https://openalex.org/W4404417996","doi":"https://doi.org/10.1145/3702001","title":"RT-GNN: Accelerating Sparse Graph Neural Networks by Tensor-CUDA Kernel Fusion","display_name":"RT-GNN: Accelerating Sparse Graph Neural Networks by Tensor-CUDA Kernel Fusion","publication_year":2024,"publication_date":"2024-11-15","ids":{"openalex":"https://openalex.org/W4404417996","doi":"https://doi.org/10.1145/3702001"},"language":"en","primary_location":{"id":"doi:10.1145/3702001","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3702001","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1145/3702001","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103275408","display_name":"Jianrong Yan","orcid":"https://orcid.org/0000-0002-6777-7899"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianrong Yan","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-6777-7899","affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003231472","display_name":"Wenbin Jiang","orcid":"https://orcid.org/0000-0001-5628-8806"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbin Jiang","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0001-5628-8806","affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Dongao He","orcid":"https://orcid.org/0009-0002-4113-3945"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongao He","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0002-4113-3945","affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043459840","display_name":"Suyang Wen","orcid":null},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Suyang Wen","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0000-4284-738X","affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yang Li","orcid":"https://orcid.org/0009-0001-7801-9647"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Li","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0001-7801-9647","affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022262922","display_name":"Hai Jin","orcid":"https://orcid.org/0000-0002-3934-7605"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hai Jin","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-3934-7605","affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066761150","display_name":"Zhiyuan Shao","orcid":"https://orcid.org/0000-0003-2139-6465"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyuan Shao","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0003-2139-6465","affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, Service Computing Technology and System Lab, Cluster and Grid Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5103275408"],"corresponding_institution_ids":["https://openalex.org/I47720641"],"apc_list":null,"apc_paid":null,"fwci":1.5801,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.86399554,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"22","issue":"1","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7491965293884277},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6775723695755005},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5845595598220825},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.45641475915908813},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.4348306357860565},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.41475579142570496},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33605480194091797},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3273009657859802},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.27653273940086365},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21037426590919495},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.0629379153251648}],"concepts":[{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7491965293884277},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6775723695755005},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5845595598220825},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.45641475915908813},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.4348306357860565},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.41475579142570496},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33605480194091797},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3273009657859802},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.27653273940086365},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21037426590919495},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0629379153251648},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3702001","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3702001","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3702001","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3702001","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4410170455","display_name":null,"funder_award_id":"62372199","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1493774699","https://openalex.org/W1583837637","https://openalex.org/W2322411027","https://openalex.org/W2595233870","https://openalex.org/W2602856279","https://openalex.org/W2915063781","https://openalex.org/W2945827377","https://openalex.org/W3000310304","https://openalex.org/W3044913359","https://openalex.org/W3081191522","https://openalex.org/W3090369187","https://openalex.org/W3101553402","https://openalex.org/W3106125969","https://openalex.org/W3132185085","https://openalex.org/W3162147375","https://openalex.org/W3206743063","https://openalex.org/W4221106024","https://openalex.org/W4229779967","https://openalex.org/W4245145882","https://openalex.org/W4285606578","https://openalex.org/W4290000885","https://openalex.org/W4309672181","https://openalex.org/W4310054974","https://openalex.org/W4312191280","https://openalex.org/W4312901420","https://openalex.org/W4317931697","https://openalex.org/W4318541522","https://openalex.org/W4393378444","https://openalex.org/W6859094688"],"related_works":["https://openalex.org/W4399354997","https://openalex.org/W3062287","https://openalex.org/W2380390332","https://openalex.org/W2742145873","https://openalex.org/W2023572661","https://openalex.org/W2005148983","https://openalex.org/W2012954338","https://openalex.org/W2096672917","https://openalex.org/W2392023973","https://openalex.org/W2076165488"],"abstract_inverted_index":{"Graph":[0],"Neural":[1],"Networks":[2],"(GNNs)":[3],"have":[4],"achieved":[5],"remarkable":[6],"successes":[7],"in":[8,40,54,122,140],"various":[9],"graph-based":[10],"learning":[11],"tasks,":[12],"thanks":[13],"to":[14,17,48,85,110,127,147,162,184],"their":[15,148],"ability":[16],"leverage":[18],"advanced":[19,32,80],"GPUs.":[20,41],"However,":[21],"GNNs":[22],"currently":[23],"face":[24],"challenges":[25,43],"arising":[26],"from":[27,58],"the":[28,59,77,87,92,112,129,136,188,218],"concurrent":[29],"use":[30],"of":[31,66,79,94,132,176,190,211],"Tensor":[33],"Cores":[34,38],"(TCs)":[35],"and":[36,51,62,82,144,160,205],"CUDA":[37],"(CDs)":[39],"These":[42],"are":[44],"further":[45,119],"exacerbated":[46],"due":[47],"repeated,":[49],"inefficient,":[50],"redundant":[52],"aggregations":[53,124],"GNN":[55,73,101,199],"that":[56,168],"result":[57],"high":[60],"sparsity":[61,131,149],"irregular":[63],"non-zero":[64],"distribution":[65],"real-world":[67],"graphs.":[68],"We":[69],"propose":[70],"RT-GNN,":[71],"a":[72,99,151],"framework":[74],"based":[75],"on":[76,187],"fusion":[78],"TC":[81],"CD":[83],"units,":[84],"eliminate":[86],"aforementioned":[88],"redundancies":[89],"by":[90,150,172,207],"exploiting":[91],"properties":[93],"an":[95,173,208],"adjacency":[96],"matrix.":[97],"First,":[98],"novel":[100],"representation":[102],"technique,":[103],"hierarchical":[104],"embedding":[105],"graph":[106],"(HEG)":[107],"is":[108],"proposed":[109],"manage":[111],"intermediate":[113,123],"aggregation":[114],"results":[115,166],"hierarchically,":[116],"which":[117,157],"can":[118],"avoid":[120],"redundancy":[121,179],"elegantly.":[125],"Next,":[126],"address":[128],"inherent":[130],"graphs,":[133],"RT-GNN":[134,196],"places":[135],"blocks":[137],"(a.k.a.":[138],"tiles)":[139],"HEG":[141,169],"onto":[142],"TCs":[143,159],"CDs":[145,161],"according":[146],"new":[152],"block-based":[153],"row-wise":[154],"multiplication":[155],"approach,":[156],"assembles":[158],"work":[163],"concurrently.":[164],"Experimental":[165],"demonstrate":[167],"outperforms":[170,197],"HAG":[171],"average":[174,209],"speedup":[175,186],"19.3\u00d7":[177],"for":[178,193],"elimination":[180],"performance,":[181,195],"especially":[182],"up":[183],"72\u00d7":[185],"dataset":[189],"ARXIV.":[191],"Moreover,":[192],"overall":[194],"state-of-the-art":[198],"frameworks":[200],"(including":[201],"DGL,":[202],"HAG,":[203],"GNNAdvisor,":[204],"TC-GNN)":[206],"factor":[210],"3.1\u00d7":[212],"while":[213],"maintaining":[214],"or":[215],"even":[216],"improving":[217],"task":[219],"accuracy.":[220]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
