{"id":"https://openalex.org/W7116372838","doi":"https://doi.org/10.1145/3754598.3754630","title":"HMGraph: Boosting GNN Training on Hierarchical Memory via Coordinated Cache","display_name":"HMGraph: Boosting GNN Training on Hierarchical Memory via Coordinated Cache","publication_year":2025,"publication_date":"2025-09-08","ids":{"openalex":"https://openalex.org/W7116372838","doi":"https://doi.org/10.1145/3754598.3754630"},"language":null,"primary_location":{"id":"doi:10.1145/3754598.3754630","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3754598.3754630","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083964601","display_name":"Lizhi Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lizhi Zhang","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101632859","display_name":"Menghan Jia","orcid":"https://orcid.org/0000-0002-5639-7882"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Menghan Jia","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104293176","display_name":"Zhiquan Lai","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiquan Lai","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101630702","display_name":"Li Qiao","orcid":"https://orcid.org/0000-0001-8754-8607"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiao Li","raw_affiliation_strings":["Xiamen University, Xiamen, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042229671","display_name":"Yiming Zhang","orcid":"https://orcid.org/0000-0002-9570-8962"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiming Zhang","raw_affiliation_strings":["Xiamen University, Xiamen, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University, Xiamen, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100681030","display_name":"Dongsheng Li","orcid":"https://orcid.org/0000-0001-7725-8040"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongsheng Li","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5083964601"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.63113128,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"374","last_page":"384"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.8902999758720398,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.8902999758720398,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.028999999165534973,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.014499999582767487,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6592000126838684},{"id":"https://openalex.org/keywords/cache-only-memory-architecture","display_name":"Cache-only memory architecture","score":0.5555999875068665},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.5442000031471252},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.5128999948501587},{"id":"https://openalex.org/keywords/cache-pollution","display_name":"Cache pollution","score":0.4318000078201294},{"id":"https://openalex.org/keywords/uniform-memory-access","display_name":"Uniform memory access","score":0.4268999993801117},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.4203000068664551},{"id":"https://openalex.org/keywords/memory-map","display_name":"Memory map","score":0.3833000063896179},{"id":"https://openalex.org/keywords/non-uniform-memory-access","display_name":"Non-uniform memory access","score":0.36550000309944153}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8668000102043152},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6592000126838684},{"id":"https://openalex.org/C3720319","wikidata":"https://www.wikidata.org/wiki/Q5015937","display_name":"Cache-only memory architecture","level":5,"score":0.5555999875068665},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.5442000031471252},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.5128999948501587},{"id":"https://openalex.org/C113166858","wikidata":"https://www.wikidata.org/wiki/Q5015981","display_name":"Cache pollution","level":5,"score":0.4318000078201294},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4296000003814697},{"id":"https://openalex.org/C51290061","wikidata":"https://www.wikidata.org/wiki/Q1936765","display_name":"Uniform memory access","level":4,"score":0.4268999993801117},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.4203000068664551},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.39890000224113464},{"id":"https://openalex.org/C74426580","wikidata":"https://www.wikidata.org/wiki/Q719484","display_name":"Memory map","level":3,"score":0.3833000063896179},{"id":"https://openalex.org/C133371097","wikidata":"https://www.wikidata.org/wiki/Q868014","display_name":"Non-uniform memory access","level":5,"score":0.36550000309944153},{"id":"https://openalex.org/C57863822","wikidata":"https://www.wikidata.org/wiki/Q905488","display_name":"Flat memory model","level":4,"score":0.3540000021457672},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.35190001130104065},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.3434999883174896},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.33899998664855957},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.33629998564720154},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.33550000190734863},{"id":"https://openalex.org/C41036726","wikidata":"https://www.wikidata.org/wiki/Q844824","display_name":"Physical address","level":3,"score":0.33500000834465027},{"id":"https://openalex.org/C59687516","wikidata":"https://www.wikidata.org/wiki/Q5015938","display_name":"Cache-oblivious algorithm","level":5,"score":0.3246000111103058},{"id":"https://openalex.org/C63511323","wikidata":"https://www.wikidata.org/wiki/Q908936","display_name":"Interleaved memory","level":4,"score":0.2912999987602234},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.28139999508857727},{"id":"https://openalex.org/C5165142","wikidata":"https://www.wikidata.org/wiki/Q5432732","display_name":"False sharing","level":5,"score":0.27630001306533813},{"id":"https://openalex.org/C1668388","wikidata":"https://www.wikidata.org/wiki/Q1149776","display_name":"Data management","level":2,"score":0.2639999985694885},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.2605000138282776},{"id":"https://openalex.org/C144240696","wikidata":"https://www.wikidata.org/wiki/Q367204","display_name":"Address space","level":2,"score":0.2587999999523163}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3754598.3754630","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3754598.3754630","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2000042664","https://openalex.org/W2807021761","https://openalex.org/W2907492528","https://openalex.org/W2945827377","https://openalex.org/W2963406064","https://openalex.org/W3002924435","https://openalex.org/W3096566397","https://openalex.org/W3198239267","https://openalex.org/W3207203625","https://openalex.org/W4220807331","https://openalex.org/W4224247286","https://openalex.org/W4292718518","https://openalex.org/W4318541614","https://openalex.org/W4321466207","https://openalex.org/W4372267520","https://openalex.org/W4381328689","https://openalex.org/W4381832157","https://openalex.org/W4385819934","https://openalex.org/W4395106434","https://openalex.org/W4396601595","https://openalex.org/W4401408756"],"related_works":[],"abstract_inverted_index":{"The":[0,49],"GPU-CPU-SSD":[1],"hierarchical":[2,46,69,136],"memory":[3,18,22,47,59,62,70,80],"systems":[4,162],"are":[5,88],"commonly":[6],"employed":[7],"for":[8,67],"large-scale":[9,153],"GNN":[10,38,74],"training.":[11],"However,":[12],"existing":[13],"solutions":[14],"inefficiently":[15],"utilize":[16],"high-bandwidth":[17,147],"due":[19],"to":[20,109,125],"coarse-grained":[21],"management":[23,96,123],"and":[24,60,72,79],"poor":[25],"data":[26,78,106,121,138],"placement":[27],"that":[28,98,141,156],"ignores":[29],"graph":[30,105],"access":[31,107],"patterns.":[32],"This":[33],"paper":[34],"presents":[35],"HMGraph,":[36],"a":[37,64,119,135],"training":[39],"system":[40,71],"unleashing":[41],"the":[42,68,104,111,127,144],"full":[43],"potential":[44],"of":[45,52,129,146,151],"architectures.":[48],"core":[50],"design":[51,92],"HMGraph":[53,157],"is":[54],"Coordinated":[55],"Cache,":[56],"integrating":[57],"GPU":[58],"CPU":[61],"as":[63],"cache":[65,95,100,113],"layer":[66],"improving":[73],"efficiency":[75,128],"through":[76],"fine-grained":[77],"management.":[81],"For":[82],"this":[83],"goal,":[84],"three":[85],"main":[86],"designs":[87],"proposed.":[89],"First,":[90],"we":[91,117,133],"an":[93],"automatic":[94],"mechanism":[97],"optimizes":[99],"allocation":[101],"based":[102],"on":[103],"pattern":[108],"enhance":[110],"overall":[112],"hit":[114],"rate.":[115],"Second,":[116],"propose":[118],"dynamic":[120,130],"space":[122],"strategy":[124,140],"improve":[126],"cache.":[131],"Third,":[132],"develop":[134],"memory-aware":[137],"partitioning":[139],"further":[142],"improves":[143],"utilization":[145],"memory.":[148],"Our":[149],"evaluation":[150],"various":[152],"graphs":[154],"reveals":[155],"significantly":[158],"outperforms":[159],"other":[160],"state-of-the-art":[161],"by":[163],"1.4-36.7":[164],"\u00d7.":[165]},"counts_by_year":[],"updated_date":"2025-12-21T02:06:08.432651","created_date":"2025-12-21T00:00:00"}
