{"id":"https://openalex.org/W4416004258","doi":"https://doi.org/10.1145/3731599.3767392","title":"Accelerating Intra-Node GPU Communication: A Performance Model for Multi-Path Transfers","display_name":"Accelerating Intra-Node GPU Communication: A Performance Model for Multi-Path Transfers","publication_year":2025,"publication_date":"2025-11-07","ids":{"openalex":"https://openalex.org/W4416004258","doi":"https://doi.org/10.1145/3731599.3767392"},"language":null,"primary_location":{"id":"doi:10.1145/3731599.3767392","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731599.3767392","pdf_url":null,"source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3731599.3767392","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031362631","display_name":"Amirhossein Sojoodi","orcid":"https://orcid.org/0000-0001-9877-3201"},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Amirhossein Sojoodi","raw_affiliation_strings":["Queens University, Kingston, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"Queens University, Kingston, Ontario, Canada","institution_ids":["https://openalex.org/I204722609"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103539740","display_name":"Mohammad K. Akbari","orcid":"https://orcid.org/0009-0008-3041-629X"},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Mohammad Akbari","raw_affiliation_strings":["Queen's University, Kingston, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"Queen's University, Kingston, Ontario, Canada","institution_ids":["https://openalex.org/I204722609"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114553985","display_name":"Hamed Sharifian","orcid":"https://orcid.org/0009-0002-2275-3313"},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Hamed Sharifian","raw_affiliation_strings":["Queen's University, Kingston, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"Queen's University, Kingston, Ontario, Canada","institution_ids":["https://openalex.org/I204722609"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119281787","display_name":"Ali Farazdaghi","orcid":"https://orcid.org/0009-0006-7112-5080"},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ali Farazdaghi","raw_affiliation_strings":["Queen's University, Kingston, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"Queen's University, Kingston, Ontario, Canada","institution_ids":["https://openalex.org/I204722609"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009842811","display_name":"Ryan E. Grant","orcid":"https://orcid.org/0000-0002-0163-3892"},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ryan E. Grant","raw_affiliation_strings":["Queen's University, Kingston, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"Queen's University, Kingston, Ontario, Canada","institution_ids":["https://openalex.org/I204722609"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039854005","display_name":"Ahmad Afsahi","orcid":"https://orcid.org/0000-0002-2924-6851"},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ahmad Afsahi","raw_affiliation_strings":["Queen's University, Kingston, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"Queen's University, Kingston, Ontario, Canada","institution_ids":["https://openalex.org/I204722609"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5031362631"],"corresponding_institution_ids":["https://openalex.org/I204722609"],"apc_list":null,"apc_paid":null,"fwci":2.2703,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.89837229,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"449","last_page":"460"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.7876999974250793,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.7876999974250793,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.12710000574588776,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.011099999770522118,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6848000288009644},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6392999887466431},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6288999915122986},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.475600004196167},{"id":"https://openalex.org/keywords/models-of-communication","display_name":"Models of communication","score":0.44519999623298645},{"id":"https://openalex.org/keywords/communications-system","display_name":"Communications system","score":0.3910999894142151},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.34709998965263367}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7971000075340271},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6848000288009644},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6392999887466431},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6288999915122986},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5306000113487244},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.475600004196167},{"id":"https://openalex.org/C158156997","wikidata":"https://www.wikidata.org/wiki/Q1416645","display_name":"Models of communication","level":2,"score":0.44519999623298645},{"id":"https://openalex.org/C101765175","wikidata":"https://www.wikidata.org/wiki/Q577764","display_name":"Communications system","level":2,"score":0.3910999894142151},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3783000111579895},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.34709998965263367},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C557945733","wikidata":"https://www.wikidata.org/wiki/Q389772","display_name":"Data transmission","level":2,"score":0.31139999628067017},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2955000102519989},{"id":"https://openalex.org/C192126672","wikidata":"https://www.wikidata.org/wiki/Q1068715","display_name":"Telecommunications network","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C115874739","wikidata":"https://www.wikidata.org/wiki/Q825377","display_name":"Critical path method","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2639999985694885},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.2637999951839447},{"id":"https://openalex.org/C2777115002","wikidata":"https://www.wikidata.org/wiki/Q7168246","display_name":"Performance prediction","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.25529998540878296},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3731599.3767392","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731599.3767392","pdf_url":null,"source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3731599.3767392","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731599.3767392","pdf_url":null,"source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W134103711","https://openalex.org/W1519846186","https://openalex.org/W1637731592","https://openalex.org/W1962931680","https://openalex.org/W1965942711","https://openalex.org/W1976139981","https://openalex.org/W1984296775","https://openalex.org/W2042120213","https://openalex.org/W2055058176","https://openalex.org/W2059300917","https://openalex.org/W2102061396","https://openalex.org/W2127589680","https://openalex.org/W2341343020","https://openalex.org/W2908578142","https://openalex.org/W2964324519","https://openalex.org/W3000071864","https://openalex.org/W3094223918","https://openalex.org/W3159281830","https://openalex.org/W3204371345","https://openalex.org/W4205832898","https://openalex.org/W4210863326","https://openalex.org/W4212780575","https://openalex.org/W4316813731","https://openalex.org/W4323644233","https://openalex.org/W4365504784","https://openalex.org/W4385723308","https://openalex.org/W4386709562","https://openalex.org/W4386774053","https://openalex.org/W4392629219","https://openalex.org/W4393931333","https://openalex.org/W4394805246","https://openalex.org/W4395073435","https://openalex.org/W4402402782","https://openalex.org/W4402702025","https://openalex.org/W4404133383","https://openalex.org/W4405316497"],"related_works":[],"abstract_inverted_index":{"Optimizing":[0],"GPU-to-GPU":[1],"communication":[2,18,31,47,72],"is":[3],"a":[4,23,44],"key":[5],"challenge":[6],"for":[7,27,42,111],"improving":[8],"performance":[9,25],"in":[10,96,106],"MPI-based":[11],"HPC":[12],"applications,":[13],"especially":[14],"when":[15],"utilizing":[16],"multiple":[17,49],"paths.":[19,50],"This":[20],"paper":[21],"presents":[22],"novel":[24],"model":[26,64,87],"intra-node":[28],"multi-path":[29],"GPU":[30],"within":[32],"the":[33,39,63,99,108],"MPI+UCX":[34],"framework,":[35],"aimed":[36],"at":[37],"determining":[38],"optimal":[40,91,109],"configuration":[41,110],"distributing":[43],"single":[45],"P2P":[46],"across":[48],"By":[51],"considering":[52],"factors":[53],"such":[54],"as":[55],"link":[56],"bandwidth,":[57],"pipeline":[58],"overhead,":[59],"and":[60,74],"stream":[61],"synchronization,":[62],"identifies":[65],"an":[66],"efficient":[67],"path":[68],"distribution":[69],"strategy,":[70],"reducing":[71],"overhead":[73],"increasing":[75],"throughput.":[76],"Through":[77],"extensive":[78],"experiments":[79],"on":[80],"various":[81],"topologies,":[82],"we":[83],"demonstrate":[84],"that":[85],"our":[86],"accurately":[88],"finds":[89],"experimentally":[90],"configurations,":[92],"achieving":[93],"significant":[94],"improvements":[95],"performance,":[97],"with":[98],"average":[100],"of":[101],"less":[102],"than":[103],"6%":[104],"error":[105],"predicting":[107],"very":[112],"large":[113],"messages.":[114]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-11-07T00:00:00"}
