{"id":"https://openalex.org/W7116427620","doi":"https://doi.org/10.1145/3754598.3754664","title":"Scaling Distributed Graph Processing to Hundreds of GPUs","display_name":"Scaling Distributed Graph Processing to Hundreds of GPUs","publication_year":2025,"publication_date":"2025-09-08","ids":{"openalex":"https://openalex.org/W7116427620","doi":"https://doi.org/10.1145/3754598.3754664"},"language":null,"primary_location":{"id":"doi:10.1145/3754598.3754664","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3754598.3754664","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3754598.3754664","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038499316","display_name":"George M. Slota","orcid":"https://orcid.org/0000-0002-4709-4724"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"George Slota","raw_affiliation_strings":["RPI, Troy, NY, USA"],"raw_orcid":"https://orcid.org/0000-0002-4709-4724","affiliations":[{"raw_affiliation_string":"RPI, Troy, NY, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015590605","display_name":"Michael Mandulak","orcid":"https://orcid.org/0009-0002-6656-6237"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Michael Mandulak","raw_affiliation_strings":["RPI, Troy, NY, USA"],"raw_orcid":"https://orcid.org/0009-0002-6656-6237","affiliations":[{"raw_affiliation_string":"RPI, Troy, NY, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5038499316"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.60410619,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"228","last_page":"237"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9763000011444092,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9763000011444092,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.007300000172108412,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10720","display_name":"Complexity and Algorithms in Graphs","score":0.00139999995008111,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7010999917984009},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4819999933242798},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.47850000858306885},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.4754999876022339},{"id":"https://openalex.org/keywords/graph-partition","display_name":"Graph partition","score":0.4643999934196472},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4090999960899353},{"id":"https://openalex.org/keywords/graph-algorithms","display_name":"Graph algorithms","score":0.3662000000476837}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.810699999332428},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7010999917984009},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.5515999794006348},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4819999933242798},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.47850000858306885},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.4754999876022339},{"id":"https://openalex.org/C48903430","wikidata":"https://www.wikidata.org/wiki/Q491370","display_name":"Graph partition","level":3,"score":0.4643999934196472},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4090999960899353},{"id":"https://openalex.org/C2986651925","wikidata":"https://www.wikidata.org/wiki/Q1514868","display_name":"Graph algorithms","level":3,"score":0.3662000000476837},{"id":"https://openalex.org/C102379954","wikidata":"https://www.wikidata.org/wiki/Q2589940","display_name":"Call graph","level":2,"score":0.34779998660087585},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3447999954223633},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.322299987077713},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.287200003862381},{"id":"https://openalex.org/C2982832238","wikidata":"https://www.wikidata.org/wiki/Q5531640","display_name":"General purpose","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.2809000015258789},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.27630001306533813},{"id":"https://openalex.org/C80899671","wikidata":"https://www.wikidata.org/wiki/Q1304193","display_name":"Vertex (graph theory)","level":3,"score":0.2637999951839447},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.25839999318122864}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3754598.3754664","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3754598.3754664","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3754598.3754664","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3754598.3754664","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 54th International Conference on Parallel Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1485041102","https://openalex.org/W1578199244","https://openalex.org/W2062140606","https://openalex.org/W2084991847","https://openalex.org/W2170616854","https://openalex.org/W2296407087","https://openalex.org/W2480409449","https://openalex.org/W2718955078","https://openalex.org/W2963794747","https://openalex.org/W2964357930","https://openalex.org/W2986726791","https://openalex.org/W2987753956","https://openalex.org/W2998104365","https://openalex.org/W3006582303","https://openalex.org/W3043803354","https://openalex.org/W4235810972","https://openalex.org/W4253426709","https://openalex.org/W4391096432","https://openalex.org/W4391986935"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"HPCGraph-GPU,":[3],"methods":[4,18],"for":[5,9,21,67,115],"optimized":[6],"2D":[7,17,48,116],"communications":[8],"general":[10,37],"graph":[11,54,88],"computations":[12],"on":[13,28,137,165,171],"hundreds":[14],"GPUs.":[15],"While":[16],"have":[19,26],"existed":[20],"decades,":[22],"most":[23],"prior":[24,124],"efforts":[25,105,128],"focused":[27],"specialized":[29],"benchmarks":[30],"such":[31],"as":[32],"breadth-first":[33],"search":[34],"(BFS)":[35],"or":[36],"processing":[38],"at":[39],"a":[40,82],"modest":[41],"scale.":[42],"We":[43],"extend":[44],"the":[45,75,85,107,121,131,138,143],"usage":[46],"of":[47,77,84,110,123],"distributions":[49],"to":[50],"arbitrary":[51],"and":[52,58,65,71,101,112],"massive-scale":[53],"computations,":[55],"developing":[56],"lightweight":[57],"sparse":[59],"communication":[60],"patterns,":[61],"active":[62],"vertex":[63],"queues,":[64],"approaches":[66],"more":[68,92],"complex":[69,93],"reductions":[70],"communications.":[72],"To":[73],"demonstrate":[74],"efficacy":[76],"our":[78],"approach,":[79],"we":[80,155],"implement":[81],"handful":[83],"standard":[86],"benchmark":[87],"algorithms":[89],"along":[90],"with":[91],"routines,":[94],"including":[95],"label":[96],"propagation,":[97],"maximum":[98],"weight":[99],"matching,":[100],"pointer":[102],"jumping.":[103],"Our":[104],"approach":[106],"theoretical":[108],"limits":[109],"strong":[111],"weak":[113],"scaling":[114],"methods,":[117],"while":[118],"greatly":[119],"outperforming":[120],"scalability":[122],"related":[125],"work.":[126],"These":[127],"also":[129],"offer":[130],"first":[132],"generalized":[133],"multi-GPU":[134],"performance":[135,157],"results":[136],"largest":[139],"publicly":[140],"available":[141],"dataset,":[142],"128":[144],"billion":[145,160],"edge":[146],"2012":[147],"Web":[148],"Data":[149],"Commons":[150],"crawl.":[151],"On":[152],"this":[153],"input,":[154],"observe":[156],"from":[158],"26-123":[159],"edges":[161],"processed":[162],"per":[163],"second":[164],"400":[166],"\u00d7":[167],"V100":[168],"GPUs,":[169],"depending":[170],"algorithm":[172],"complexity.":[173]},"counts_by_year":[],"updated_date":"2025-12-21T02:06:08.432651","created_date":"2025-12-21T00:00:00"}
