{"id":"https://openalex.org/W4388738128","doi":"https://doi.org/10.1145/3606043.3606051","title":"Efficient SpMM with Kernel Switching on GPUs for Graph Neural Networks","display_name":"Efficient SpMM with Kernel Switching on GPUs for Graph Neural Networks","publication_year":2023,"publication_date":"2023-06-17","ids":{"openalex":"https://openalex.org/W4388738128","doi":"https://doi.org/10.1145/3606043.3606051"},"language":"en","primary_location":{"id":"doi:10.1145/3606043.3606051","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3606043.3606051","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 7th International Conference on High Performance Compilation, Computing and Communications","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101534224","display_name":"Hang Yu","orcid":"https://orcid.org/0009-0005-0100-7989"},"institutions":[{"id":"https://openalex.org/I116265982","display_name":"Qinghai University","ror":"https://ror.org/05h33bt13","country_code":"CN","type":"education","lineage":["https://openalex.org/I116265982"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hang Yu","raw_affiliation_strings":["Qinghai University, China"],"affiliations":[{"raw_affiliation_string":"Qinghai University, China","institution_ids":["https://openalex.org/I116265982"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003261630","display_name":"Haodong Bian","orcid":"https://orcid.org/0000-0003-0907-288X"},"institutions":[{"id":"https://openalex.org/I116265982","display_name":"Qinghai University","ror":"https://ror.org/05h33bt13","country_code":"CN","type":"education","lineage":["https://openalex.org/I116265982"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haodong Bian","raw_affiliation_strings":["Qinghai University, China"],"affiliations":[{"raw_affiliation_string":"Qinghai University, China","institution_ids":["https://openalex.org/I116265982"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101945411","display_name":"Jianqiang Huang","orcid":"https://orcid.org/0000-0002-4454-7919"},"institutions":[{"id":"https://openalex.org/I116265982","display_name":"Qinghai University","ror":"https://ror.org/05h33bt13","country_code":"CN","type":"education","lineage":["https://openalex.org/I116265982"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianqiang Huang","raw_affiliation_strings":["Qinghai University, China"],"affiliations":[{"raw_affiliation_string":"Qinghai University, China","institution_ids":["https://openalex.org/I116265982"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101534224"],"corresponding_institution_ids":["https://openalex.org/I116265982"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14751805,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"56","last_page":"61"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9814000129699707,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7343960404396057},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.6265114545822144},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5475614070892334},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5310544967651367},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.47496554255485535},{"id":"https://openalex.org/keywords/graph-theory","display_name":"Graph theory","score":0.4284375011920929},{"id":"https://openalex.org/keywords/symmetric-matrix","display_name":"Symmetric matrix","score":0.42753279209136963},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4235844612121582},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4074026942253113},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3745998740196228},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3575246036052704},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3037840723991394},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15545475482940674},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.1055586040019989}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7343960404396057},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.6265114545822144},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5475614070892334},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5310544967651367},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.47496554255485535},{"id":"https://openalex.org/C88230418","wikidata":"https://www.wikidata.org/wiki/Q131476","display_name":"Graph theory","level":2,"score":0.4284375011920929},{"id":"https://openalex.org/C54848796","wikidata":"https://www.wikidata.org/wiki/Q339011","display_name":"Symmetric matrix","level":3,"score":0.42753279209136963},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4235844612121582},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4074026942253113},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3745998740196228},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3575246036052704},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3037840723991394},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15545475482940674},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.1055586040019989},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3606043.3606051","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3606043.3606051","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 7th International Conference on High Performance Compilation, Computing and Communications","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3005660821","display_name":null,"funder_award_id":"No.62062059, No.62162053","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1985938427","https://openalex.org/W1996274599","https://openalex.org/W2043420024","https://openalex.org/W2128853364","https://openalex.org/W2751808960","https://openalex.org/W2914631005","https://openalex.org/W2951135776","https://openalex.org/W4236418138","https://openalex.org/W4247712932","https://openalex.org/W4310895557"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2146343568","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2013643406","https://openalex.org/W2805834348","https://openalex.org/W2069857942","https://openalex.org/W4256089310","https://openalex.org/W4292439716"],"abstract_inverted_index":{"Graph":[0],"neural":[1,16,44,60],"networks":[2,17,45],"are":[3,46],"frameworks":[4],"for":[5,55,112,156,184,210],"deep":[6],"learning":[7],"on":[8,48,97,139,150,219],"graph-structured":[9],"data.":[10],"When":[11],"updating":[12],"node":[13],"information,":[14],"graph":[15,43,59],"need":[18],"to":[19,65,94,164,208],"aggregate":[20],"information":[21],"from":[22,193],"peripheral":[23],"node,":[24],"which":[25],"can":[26],"be":[27],"represented":[28],"mathematically":[29],"as":[30],"a":[31,36,92],"sparse":[32,74,103,116,120,175,186],"matrix":[33,104,121],"multiplied":[34],"by":[35,72,217],"dense":[37],"matrix.":[38],"Because":[39,118],"many":[40],"calculations":[41],"in":[42],"based":[47,138,149],"SpMM-like":[49],"operations,":[50],"accelerating":[51],"SpMM":[52,96,211],"is":[53,83,124,131,137,148,161,182],"significant":[54],"overall":[56],"acceleration":[57],"of":[58,79,115,168,173,205],"network":[61],"training.":[62],"However,":[63],"due":[64],"the":[66,73,76,119,140,144,151,157,165,174,178,194,213],"highly":[67],"irregular":[68],"memory":[69],"access":[70],"caused":[71],"matrix,":[75,176],"data":[77,129],"locality":[78],"this":[80,87],"computing":[81],"kernel":[82,181],"poor.":[84],"To":[85],"address":[86],"issue,":[88],"we":[89],"have":[90],"designed":[91],"method":[93,100,136,147,160],"accelerate":[95],"GPU.":[98,223],"Our":[99],"adopts":[101],"two":[102,108],"storage":[105,122],"formats":[106],"and":[107,177],"task":[109],"partitioning":[110],"methods":[111],"different":[113,185],"types":[114],"matrices.":[117,187],"format":[123],"commonly":[125],"used,":[126],"no":[127],"additional":[128],"conversion":[130],"required.":[132],"The":[133,154],"row":[134,172],"splitting":[135,146,159],"CSR":[141],"format,":[142],"while":[143],"non-zero":[145,158,169],"COO":[152],"format.":[153],"threshold":[155],"set":[162],"according":[163],"average":[166,203],"number":[167],"entries":[170],"per":[171],"optimal":[179],"calculation":[180],"selected":[183],"Experimental":[188],"evaluation":[189],"using":[190],"real-world":[191],"graphs":[192],"SuiteSparse":[195],"collection":[196],"shows":[197],"that":[198],"our":[199],"technique":[200],"achieves":[201],"an":[202,220],"speedup":[204],"1.43x":[206],"(up":[207],"2.26x)":[209],"over":[212],"vendor-optimized":[214],"library":[215],"cuSPARSE[1]":[216],"NVIDIA":[218,221],"V100":[222]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
