{"id":"https://openalex.org/W4408222700","doi":"https://doi.org/10.1109/tpds.2025.3549180","title":"Graphite: Hardware-Aware GNN Reshaping for Acceleration With GPU Tensor Cores","display_name":"Graphite: Hardware-Aware GNN Reshaping for Acceleration With GPU Tensor Cores","publication_year":2025,"publication_date":"2025-03-07","ids":{"openalex":"https://openalex.org/W4408222700","doi":"https://doi.org/10.1109/tpds.2025.3549180"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2025.3549180","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2025.3549180","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100607173","display_name":"Hyeonjin Kim","orcid":"https://orcid.org/0000-0001-5671-1870"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Hyeonjin Kim","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111436427","display_name":"Taesoo Lim","orcid":"https://orcid.org/0000-0002-2472-595X"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Taesoo Lim","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028411609","display_name":"William J. Song","orcid":"https://orcid.org/0000-0001-9170-5986"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"William J. Song","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Yonsei University, Seoul, South Korea","institution_ids":["https://openalex.org/I193775966"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100607173"],"corresponding_institution_ids":["https://openalex.org/I193775966"],"apc_list":null,"apc_paid":null,"fwci":2.2703,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82512521,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"36","issue":"5","first_page":"918","last_page":"931"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9747999906539917,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9384999871253967,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.7743516564369202},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7296633720397949},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.666848361492157},{"id":"https://openalex.org/keywords/graphite","display_name":"Graphite","score":0.5444137454032898},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.516922652721405},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5040911436080933},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.41921138763427734},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.41920116543769836},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.35561883449554443},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3059370517730713},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.297809898853302},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.20946255326271057},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.16890552639961243},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.127592533826828}],"concepts":[{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.7743516564369202},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7296633720397949},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.666848361492157},{"id":"https://openalex.org/C2779698641","wikidata":"https://www.wikidata.org/wiki/Q5309","display_name":"Graphite","level":2,"score":0.5444137454032898},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.516922652721405},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5040911436080933},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.41921138763427734},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.41920116543769836},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.35561883449554443},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3059370517730713},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.297809898853302},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.20946255326271057},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.16890552639961243},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.127592533826828},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2025.3549180","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2025.3549180","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.8600000143051147}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1932742904","https://openalex.org/W2008620264","https://openalex.org/W2015953751","https://openalex.org/W2047060659","https://openalex.org/W2079038734","https://openalex.org/W2127558904","https://openalex.org/W2142444503","https://openalex.org/W2153959628","https://openalex.org/W2161902954","https://openalex.org/W2444127451","https://openalex.org/W2558632230","https://openalex.org/W2769133055","https://openalex.org/W2807021761","https://openalex.org/W3017228913","https://openalex.org/W3043303806","https://openalex.org/W3102337469","https://openalex.org/W3102510044","https://openalex.org/W3132695675","https://openalex.org/W3158112431","https://openalex.org/W4327694885","https://openalex.org/W4360831960","https://openalex.org/W4360831975","https://openalex.org/W4384705444","https://openalex.org/W4391986945","https://openalex.org/W6602764823","https://openalex.org/W6726873649","https://openalex.org/W6738964360","https://openalex.org/W6745085599","https://openalex.org/W6745537798","https://openalex.org/W6754929296","https://openalex.org/W6760045743","https://openalex.org/W6764171799","https://openalex.org/W6765543928","https://openalex.org/W6767710714","https://openalex.org/W6771621015","https://openalex.org/W6780489652","https://openalex.org/W6797677657","https://openalex.org/W6810665531"],"related_works":["https://openalex.org/W2505380084","https://openalex.org/W4390373703","https://openalex.org/W2347629889","https://openalex.org/W2011217785","https://openalex.org/W2080700917","https://openalex.org/W1656096860","https://openalex.org/W2095928260","https://openalex.org/W2268149564","https://openalex.org/W1984739956","https://openalex.org/W2763312740"],"abstract_inverted_index":{"Graph":[0],"neural":[1,67],"networks":[2],"(GNNs)":[3],"have":[4],"emerged":[5],"as":[6,41,63],"powerful":[7],"tools":[8],"for":[9,140,213],"addressing":[10],"non-euclidean":[11],"problems.":[12],"GNNs":[13],"operate":[14,197],"through":[15,164],"two":[16],"key":[17],"execution":[18],"phases:":[19],"i)":[20],"aggregation":[21,27,87],"and":[22,50,112,186,221],"ii)":[23],"combination.":[24],"In":[25],"the":[26,29,59,74,86,107,230],"phase,":[28],"feature":[30,52,61],"data":[31],"of":[32,109,211],"neighboring":[33],"graph":[34,83,148,152],"nodes":[35],"are":[36],"gathered,":[37],"which":[38],"is":[39,77],"expressed":[40],"sparse-dense":[42],"matrix":[43,49,76],"multiplication":[44],"(SpMM)":[45],"between":[46],"an":[47,207,225],"adjacency":[48,75,214],"a":[51,66,89,101],"embedding":[53,62],"table.":[54],"The":[55],"combination":[56],"phase":[57,88],"takes":[58],"aggregated":[60],"input":[64],"to":[65,81,105,118,154],"network":[68],"model":[69],"with":[70,158,219],"learnable":[71],"weights.":[72],"Typically,":[73],"extremely":[78],"sparse":[79,147,156,177],"due":[80],"inherent":[82],"structures,":[84],"making":[85],"significant":[90],"bottleneck":[91],"in":[92,184],"GNN":[93,102,141,233],"computations.":[94],"This":[95],"paper":[96],"introduces":[97],"<italic":[98,144,169,187],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[99,145,170,188],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Graphite</i>,":[100],"acceleration":[103,128,163],"framework":[104],"overcome":[106],"challenge":[108],"SpMM":[110],"operations":[111,157],"enable":[113],"graphics":[114],"processing":[115],"units":[116,129],"(GPUs)":[117],"exploit":[119],"massive":[120],"thread-level":[121],"parallelism":[122],"more":[123],"efficiently":[124],"via":[125],"existing":[126],"dense":[127,159],"(i.e.,":[130],"tensor":[131,166],"cores).":[132],"To":[133],"that":[134,196,204],"end,":[135],"Graphite":[136,205,223],"employs":[137],"three":[138],"techniques":[139],"acceleration.":[142],"First,":[143],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">hardware-aware":[146],"reshaping":[149],"(HAS)</i>":[150],"rearranges":[151],"structures":[153],"replace":[155],"computations,":[160],"enabling":[161],"hardware":[162],"GPU":[165],"cores.":[167],"Additionally,":[168],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">balanced":[171],"thread":[172,178],"block":[173],"scheduling":[174],"(BTS)</i>":[175],"distributes":[176],"blocks":[179],"evenly":[180],"across":[181],"streaming":[182],"multiprocessors":[183],"GPUs,":[185],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">zero-aware":[189],"warp":[190],"skipping":[191],"(ZAWS)</i>":[192],"eliminates":[193],"ineffective":[194],"threads":[195],"on":[198],"meaningless":[199],"zeros.":[200],"Experimental":[201],"results":[202],"show":[203],"achieves":[206],"average":[208,226],"compression":[209],"rate":[210],"84.1%":[212],"matrices":[215],"using":[216],"HAS.":[217],"Combined":[218],"BTS":[220],"ZAWS,":[222],"delivers":[224],"1.55x":[227],"speedup":[228],"over":[229],"conventional":[231],"SpMM-based":[232],"computation":[234],"method.":[235]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
