{"id":"https://openalex.org/W4318256780","doi":"https://doi.org/10.1145/3559009.3569690","title":"Optimizing Aggregate Computation of Graph Neural Networks with on-GPU Interpreter-Style Programming","display_name":"Optimizing Aggregate Computation of Graph Neural Networks with on-GPU Interpreter-Style Programming","publication_year":2022,"publication_date":"2022-10-08","ids":{"openalex":"https://openalex.org/W4318256780","doi":"https://doi.org/10.1145/3559009.3569690"},"language":"en","primary_location":{"id":"doi:10.1145/3559009.3569690","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3559009.3569690","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3559009.3569690","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3559009.3569690","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030309370","display_name":"Zhuoran Ji","orcid":"https://orcid.org/0000-0001-9767-2767"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Zhuoran Ji","raw_affiliation_strings":["The University of Hong Kong, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113952625","display_name":"Cho\u2010Li Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Cho-Li Wang","raw_affiliation_strings":["The University of Hong Kong, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5030309370"],"corresponding_institution_ids":["https://openalex.org/I889458895"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15490348,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"83","last_page":"95"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8850589990615845},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7192369103431702},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5334553718566895},{"id":"https://openalex.org/keywords/tree-traversal","display_name":"Tree traversal","score":0.46685463190078735},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.45234692096710205},{"id":"https://openalex.org/keywords/distributed-memory","display_name":"Distributed memory","score":0.42801859974861145},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.41543513536453247},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.39480873942375183},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3439804017543793}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8850589990615845},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7192369103431702},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5334553718566895},{"id":"https://openalex.org/C140745168","wikidata":"https://www.wikidata.org/wiki/Q1210082","display_name":"Tree traversal","level":2,"score":0.46685463190078735},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.45234692096710205},{"id":"https://openalex.org/C91481028","wikidata":"https://www.wikidata.org/wiki/Q1054686","display_name":"Distributed memory","level":3,"score":0.42801859974861145},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.41543513536453247},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.39480873942375183},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3439804017543793}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3559009.3569690","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3559009.3569690","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3559009.3569690","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3559009.3569690","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3559009.3569690","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3559009.3569690","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4318256780.pdf"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1978555214","https://openalex.org/W1988307172","https://openalex.org/W1997162567","https://openalex.org/W2021211271","https://openalex.org/W2104680817","https://openalex.org/W2117539524","https://openalex.org/W2134427337","https://openalex.org/W2534888058","https://openalex.org/W2551706664","https://openalex.org/W2576259597","https://openalex.org/W2786016794","https://openalex.org/W2804032941","https://openalex.org/W2807021761","https://openalex.org/W2945827377","https://openalex.org/W2951135776","https://openalex.org/W3009233884","https://openalex.org/W3017228913","https://openalex.org/W3037699692","https://openalex.org/W3090912412","https://openalex.org/W3100848837","https://openalex.org/W3101553402","https://openalex.org/W3105753905","https://openalex.org/W4221106024","https://openalex.org/W4300011764"],"related_works":["https://openalex.org/W2317245370","https://openalex.org/W4249323025","https://openalex.org/W198851386","https://openalex.org/W2030310580","https://openalex.org/W947442053","https://openalex.org/W2070983336","https://openalex.org/W1837558792","https://openalex.org/W2363502211","https://openalex.org/W1982466989","https://openalex.org/W2995170196"],"abstract_inverted_index":{"Graph":[0],"Neural":[1],"Networks":[2],"(GNNs)":[3],"generalize":[4],"deep":[5,251],"learning":[6],"to":[7,90,175,198,216,239],"graph-structured":[8],"data":[9,200],"and":[10,31,113,150,183,193,210,247],"show":[11],"great":[12],"success":[13],"in":[14,159],"many":[15],"tasks.":[16],"However,":[17],"their":[18],"irregular":[19,35],"aggregation":[20,72],"kernels":[21,36,170],"make":[22],"them":[23,54],"inefficient":[24],"on":[25,231],"GPUs.":[26],"The":[27,143,165,236],"unpredictable":[28],"control":[29],"flow":[30],"memory":[32,57,196,218],"references":[33],"of":[34,124,139],"prohibit":[37],"most":[38],"optimizations":[39,79,166],"designed":[40,80,167],"for":[41,81,168,207,233,241,250],"regular":[42,82,169],"ones.":[43],"For":[44],"example,":[45],"even":[46],"if":[47],"the":[48,61,78,92,95,100,104,109,120,134,140,151,176,213,223],"nodes":[49,192,204],"have":[50],"overlapped":[51],"neighbors,":[52],"reusing":[53],"via":[55],"shared":[56,195],"is":[58,111,146,181],"non-trivial,":[59],"as":[60,136,157,179],"neighborhoods":[62],"used":[63],"are":[64],"runtime":[65],"information.":[66],"This":[67],"paper":[68],"presents":[69],"regGNN,":[70],"an":[71,129,137,148],"implementation":[73],"that":[74],"can":[75,154,171],"benefit":[76],"from":[77,99],"kernels.":[83],"It":[84],"proposes":[85],"a":[86,160],"concept":[87],"named":[88],"\"semi-regular\"":[89],"describe":[91],"aggregate":[93,121,130,152,177,214],"computation:":[94],"irregularity":[96],"only":[97],"comes":[98],"neighborhood":[101],"traversal;":[102],"aggregating":[103],"high-dimensional":[105],"vectors,":[106],"which":[107,132],"dominates":[108],"computation,":[110],"data-independent":[112],"thus":[114],"incurs":[115],"no":[116],"irregularity.":[117],"regGNN":[118,185,227],"encodes":[119],"computation":[122],"steps":[123],"each":[125],"thread":[126],"block":[127],"into":[128],"script,":[131,178],"replaces":[133],"graph":[135],"input":[138],"GPU":[141,144,162],"kernel.":[142],"kernel":[145],"like":[147],"interpreter,":[149],"script":[153,215],"be":[155,173],"regarded":[156],"written":[158],"simple":[161],"scripting":[163],"language.":[164],"then":[172],"applied":[174],"it":[180],"static":[182],"regular.":[184],"demonstrates":[186],"three":[187],"optimizations:":[188],"(1)":[189],"intelligently":[190],"scheduling":[191],"customizing":[194],"replacement":[197],"maximize":[199],"reuse,":[201],"(2)":[202],"reassigning":[203],"among":[205],"warps":[206],"load":[208],"balancing,":[209],"(3)":[211],"aligning":[212],"improve":[217],"latency":[219],"hiding.":[220],"Compared":[221],"with":[222,243],"state-of-the-art":[224],"GNN":[225],"frameworks,":[226],"achieves":[228],"2.81\u00d7":[229],"throughput":[230],"average":[232],"moderate-scale":[234],"GNNs.":[235,252],"speedup":[237],"increases":[238],"5.21\u00d7":[240],"GNNs":[242],"small":[244],"hidden":[245],"sizes":[246],"100s":[248],"\u00d7":[249]},"counts_by_year":[],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
