{"id":"https://openalex.org/W4416199157","doi":"https://doi.org/10.1145/3712285.3759788","title":"Moment: Co-optimizing Physical Communication Topology and Data Placement for Multi-GPU Out-of-core GNN Training","display_name":"Moment: Co-optimizing Physical Communication Topology and Data Placement for Multi-GPU Out-of-core GNN Training","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W4416199157","doi":"https://doi.org/10.1145/3712285.3759788"},"language":null,"primary_location":{"id":"doi:10.1145/3712285.3759788","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712285.3759788","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102517564","display_name":"Zuocheng Shi","orcid":"https://orcid.org/0009-0004-7835-6522"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zuocheng Shi","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102004274","display_name":"Jie Sun","orcid":"https://orcid.org/0000-0001-7030-0146"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Sun","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110126998","display_name":"Z.H. Song","orcid":"https://orcid.org/0009-0003-4832-9120"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziyu Song","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101448250","display_name":"Mo Sun","orcid":"https://orcid.org/0009-0003-0605-8748"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mo Sun","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110378446","display_name":"Yang Xiao","orcid":"https://orcid.org/0009-0005-1416-6065"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Xiao","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004882141","display_name":"Fei Wu","orcid":"https://orcid.org/0000-0003-2139-8807"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Wu","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004837462","display_name":"Zeke Wang","orcid":"https://orcid.org/0000-0001-8550-9241"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeke Wang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102517564"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":1.428,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.87214201,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"250","last_page":"264"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.5760999917984009,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.5760999917984009,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.1639000028371811,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.06669999659061432,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5771999955177307},{"id":"https://openalex.org/keywords/communications-system","display_name":"Communications system","score":0.46970000863075256},{"id":"https://openalex.org/keywords/network-topology","display_name":"Network topology","score":0.4580000042915344},{"id":"https://openalex.org/keywords/logical-topology","display_name":"Logical topology","score":0.42340001463890076},{"id":"https://openalex.org/keywords/moment","display_name":"Moment (physics)","score":0.4072999954223633},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.3917999863624573},{"id":"https://openalex.org/keywords/topology","display_name":"Topology (electrical circuits)","score":0.3783000111579895},{"id":"https://openalex.org/keywords/telecommunications-network","display_name":"Telecommunications network","score":0.37439998984336853}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7924000024795532},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5771999955177307},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.546999990940094},{"id":"https://openalex.org/C101765175","wikidata":"https://www.wikidata.org/wiki/Q577764","display_name":"Communications system","level":2,"score":0.46970000863075256},{"id":"https://openalex.org/C199845137","wikidata":"https://www.wikidata.org/wiki/Q145490","display_name":"Network topology","level":2,"score":0.4580000042915344},{"id":"https://openalex.org/C117729477","wikidata":"https://www.wikidata.org/wiki/Q145490","display_name":"Logical topology","level":3,"score":0.42340001463890076},{"id":"https://openalex.org/C179254644","wikidata":"https://www.wikidata.org/wiki/Q13222844","display_name":"Moment (physics)","level":2,"score":0.4072999954223633},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.3917999863624573},{"id":"https://openalex.org/C184720557","wikidata":"https://www.wikidata.org/wiki/Q7825049","display_name":"Topology (electrical circuits)","level":2,"score":0.3783000111579895},{"id":"https://openalex.org/C192126672","wikidata":"https://www.wikidata.org/wiki/Q1068715","display_name":"Telecommunications network","level":2,"score":0.37439998984336853},{"id":"https://openalex.org/C113138325","wikidata":"https://www.wikidata.org/wiki/Q864457","display_name":"Knapsack problem","level":2,"score":0.37380000948905945},{"id":"https://openalex.org/C12269588","wikidata":"https://www.wikidata.org/wiki/Q132364","display_name":"Communications protocol","level":2,"score":0.3718000054359436},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3671000003814697},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3578000068664551},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.35670000314712524},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.34369999170303345},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.33480000495910645},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.2953999936580658},{"id":"https://openalex.org/C179768478","wikidata":"https://www.wikidata.org/wiki/Q1120057","display_name":"Cyber-physical system","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.27810001373291016},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.27079999446868896},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.26649999618530273},{"id":"https://openalex.org/C88230418","wikidata":"https://www.wikidata.org/wiki/Q131476","display_name":"Graph theory","level":2,"score":0.25519999861717224}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3712285.3759788","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3712285.3759788","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1994727615","https://openalex.org/W2000273502","https://openalex.org/W2512971201","https://openalex.org/W2807021761","https://openalex.org/W2897862648","https://openalex.org/W2899457523","https://openalex.org/W2945827377","https://openalex.org/W2945827670","https://openalex.org/W2963601856","https://openalex.org/W2970929262","https://openalex.org/W2984239289","https://openalex.org/W2997128522","https://openalex.org/W3009901425","https://openalex.org/W3045200674","https://openalex.org/W3093133157","https://openalex.org/W3096566397","https://openalex.org/W3133537878","https://openalex.org/W3159953606","https://openalex.org/W3172127112","https://openalex.org/W4220807331","https://openalex.org/W4281401915","https://openalex.org/W4281720073","https://openalex.org/W4281725510","https://openalex.org/W4281752694","https://openalex.org/W4283314525","https://openalex.org/W4290927864","https://openalex.org/W4290944486","https://openalex.org/W4292718518","https://openalex.org/W4296720501","https://openalex.org/W4318541520","https://openalex.org/W4318541614","https://openalex.org/W4321448346","https://openalex.org/W4321636549","https://openalex.org/W4372267520","https://openalex.org/W4381832157","https://openalex.org/W4383749405","https://openalex.org/W4385763851","https://openalex.org/W4387321131","https://openalex.org/W4395117348","https://openalex.org/W4401857649","https://openalex.org/W4402042236","https://openalex.org/W4405754806","https://openalex.org/W4408029667","https://openalex.org/W4413349858","https://openalex.org/W4413360788","https://openalex.org/W4413361173"],"related_works":[],"abstract_inverted_index":{"Graph":[0],"Neural":[1],"Networks":[2],"(GNNs)":[3],"are":[4,49],"widely":[5],"employed":[6],"in":[7,75],"applications":[8],"like":[9],"recommendation":[10],"systems,":[11],"social":[12],"network":[13],"analysis,":[14],"and":[15,36,64,71,84,98,130],"fraud":[16],"detection,":[17],"but":[18],"training":[19,74],"large-scale":[20],"GNNs":[21],"is":[22],"challenging":[23],"due":[24],"to":[25,68,127,135],"its":[26],"memory":[27,43],"limitations.":[28],"Existing":[29],"systems":[30,40,48,124,132],"face":[31],"a":[32,60,76,103,109],"trade-off":[33],"between":[34],"throughput":[35],"monetary":[37,141],"cost:":[38],"Distributed":[39],"require":[41],"expensive":[42],"scaling,":[44],"while":[45],"single-machine":[46],"out-of-core":[47,123],"limited":[50],"by":[51,89,125,133],"GPU/PCIe":[52],"throughput.":[53],"To":[54],"this":[55],"end,":[56],"we":[57],"propose":[58],"Moment,":[59],"physical":[61,92],"communication":[62,82,100],"topology":[63,93],"data":[65,115],"placement":[66],"co-optimizer":[67],"enable":[69],"high-throughput":[70],"low-cost":[72],"GNN":[73],"single":[77],"multi-GPU":[78],"machine.":[79],"Moment":[80,121],"addresses":[81],"contention":[83],"GPU":[85],"load":[86],"imbalance":[87],"issues":[88],"modeling":[90],"the":[91],"as":[94,102],"capacity-constrained":[95],"directed":[96],"graphs":[97],"formulating":[99],"scheduling":[101],"max-flow":[104],"problem.":[105],"It":[106],"also":[107],"introduces":[108],"data-distribution-aware":[110],"knapsack":[111],"algorithm":[112],"for":[113],"optimized":[114],"placement.":[116],"Experimental":[117],"results":[118],"show":[119],"that":[120],"outperforms":[122],"up":[126,134],"6.51":[128],"\u00d7":[129],"distributed":[131],"3.02":[136],"\u00d7,":[137],"with":[138],"only":[139],"50%":[140],"cost.":[142]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-12T00:00:00"}
