{"id":"https://openalex.org/W4416004490","doi":"https://doi.org/10.1145/3731599.3767462","title":"MPI Communication Performance on AMD MI300A: Microbenchmarks and Applications","display_name":"MPI Communication Performance on AMD MI300A: Microbenchmarks and Applications","publication_year":2025,"publication_date":"2025-11-07","ids":{"openalex":"https://openalex.org/W4416004490","doi":"https://doi.org/10.1145/3731599.3767462"},"language":null,"primary_location":{"id":"doi:10.1145/3731599.3767462","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731599.3767462","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3731599.3767462","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071358621","display_name":"Goutham Kalikrishna Reddy Kuncham","orcid":"https://orcid.org/0000-0003-2112-4769"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Goutham Kalikrishna Reddy Kuncham","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, Columbus, Ohio, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, Columbus, Ohio, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029253472","display_name":"Siyuan Zhang","orcid":"https://orcid.org/0000-0001-6888-7793"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siyuan Zhang","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, Columbus, Ohio, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, Columbus, Ohio, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120297696","display_name":"Shoaib Mohammad","orcid":"https://orcid.org/0009-0000-9723-4396"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shoaib Mohammad","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, Columbus, Ohio, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, Columbus, Ohio, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013189498","display_name":"Chen-Chun Chen","orcid":"https://orcid.org/0000-0002-7471-7552"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chen-Chun Chen","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, Columbus, Ohio, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, Columbus, Ohio, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024879682","display_name":"Dhabaleswar K. Panda","orcid":"https://orcid.org/0000-0002-0356-1781"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dhabaleswar K. Panda","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, Columbus, Ohio, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, Columbus, Ohio, USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5071358621"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":2.1983,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.89749948,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"977","last_page":"984"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9426000118255615,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9426000118255615,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.014100000262260437,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.01119999960064888,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.5112000107765198},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.4648999869823456},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.4514999985694885},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.42669999599456787},{"id":"https://openalex.org/keywords/message-passing-interface","display_name":"Message Passing Interface","score":0.4027000069618225},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.3977000117301941},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.36890000104904175}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8525999784469604},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.525600016117096},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.5112000107765198},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.4648999869823456},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.4514999985694885},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.42669999599456787},{"id":"https://openalex.org/C166782233","wikidata":"https://www.wikidata.org/wiki/Q127879","display_name":"Message Passing Interface","level":3,"score":0.4027000069618225},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.3977000117301941},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.38760000467300415},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.36890000104904175},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.36010000109672546},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.29589998722076416},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2904999852180481},{"id":"https://openalex.org/C3020431745","wikidata":"https://www.wikidata.org/wiki/Q25325220","display_name":"Many core","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.28130000829696655},{"id":"https://openalex.org/C2780870223","wikidata":"https://www.wikidata.org/wiki/Q1004415","display_name":"Runtime system","level":2,"score":0.26820001006126404},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.26510000228881836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3731599.3767462","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731599.3767462","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3731599.3767462","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731599.3767462","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4906051714","display_name":null,"funder_award_id":"B668423","funder_id":"https://openalex.org/F4320338286","funder_display_name":"Lawrence Livermore National Laboratory"},{"id":"https://openalex.org/G785097568","display_name":null,"funder_award_id":"NCR-130002","funder_id":"https://openalex.org/F4320307938","funder_display_name":"NCR"},{"id":"https://openalex.org/G965775444","display_name":null,"funder_award_id":"2311830, 2312927, 2323116, 2415201, 2504944","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320307938","display_name":"NCR","ror":"https://ror.org/00nqjkj48"},{"id":"https://openalex.org/F4320338286","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W1825216778","https://openalex.org/W2992165038","https://openalex.org/W4406157308","https://openalex.org/W4406157579","https://openalex.org/W4410510709"],"related_works":[],"abstract_inverted_index":{"AMD\u2019s":[0],"MI300A":[1,55],"integrates":[2],"CPU":[3,63],"and":[4,17,51,64,69,74,87,90,104,120],"GPU":[5,65],"chiplets":[6],"around":[7],"a":[8,43,108],"shared":[9],"HBM3":[10],"pool,":[11],"removing":[12],"the":[13],"traditional":[14],"host-device":[15],"boundary":[16],"changing":[18],"assumptions":[19],"in":[20,126],"GPU-aware":[21],"MPI.":[22,127],"Despite":[23],"early":[24],"deployments,":[25],"there":[26],"is":[27],"little":[28],"guidance":[29,119],"on":[30,36,54,62],"how":[31],"mainstream":[32],"MPI":[33],"libraries":[34],"behave":[35],"this":[37],"architecture.":[38],"This":[39],"evaluation":[40],"paper":[41],"presents":[42],"comparative":[44],"study":[45,116],"of":[46,107],"MVAPICH-Plus,":[47],"Open":[48],"MPI,":[49],"MPICH,":[50],"Cray":[52],"MPICH":[53],"APU":[56],"nodes.":[57],"We":[58,81],"measure":[59],"point-to-point":[60],"performance":[61],"buffers,":[66],"reporting":[67],"intra-node":[68],"inter-node":[70],"latency,":[71],"unidirectional":[72],"bandwidth,":[73],"bidirectional":[75],"bandwidth":[76],"across":[77],"various":[78],"message":[79],"sizes.":[80],"then":[82],"examine":[83],"collectives,":[84],"covering":[85],"reduction-based":[86],"data-movement-based":[88],"operations,":[89],"analyze":[91],"scaling":[92],"behavior.":[93],"Finally,":[94],"we":[95],"connect":[96],"microbenchmark":[97],"trends":[98],"to":[99],"application":[100],"results":[101],"using":[102],"OpenFOAM":[103],"Distributed":[105],"training":[106],"large":[109],"language":[110],"model":[111],"(LLM)":[112],"with":[113],"PyTorch.":[114],"The":[115],"distills":[117],"practical":[118],"highlights":[121],"opportunities":[122],"for":[123],"MI300A-aware":[124],"optimizations":[125]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-07T00:00:00"}
