{"id":"https://openalex.org/W4406163786","doi":"https://doi.org/10.1109/mm.2024.3524891","title":"OHIO: Enhancing RDMA Scalability in Alltoall With Optimized Communication Overlap","display_name":"OHIO: Enhancing RDMA Scalability in Alltoall With Optimized Communication Overlap","publication_year":2025,"publication_date":"2025-01-08","ids":{"openalex":"https://openalex.org/W4406163786","doi":"https://doi.org/10.1109/mm.2024.3524891"},"language":"en","primary_location":{"id":"doi:10.1109/mm.2024.3524891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mm.2024.3524891","pdf_url":null,"source":{"id":"https://openalex.org/S59697426","display_name":"IEEE Micro","issn_l":"0272-1732","issn":["0272-1732","1937-4143"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Micro","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049257402","display_name":"Tu Tran","orcid":"https://orcid.org/0000-0003-0040-8404"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tu Tran","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071358621","display_name":"Goutham Kalikrishna Reddy Kuncham","orcid":"https://orcid.org/0000-0003-2112-4769"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Goutham Kalikrishna Reddy Kuncham","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103171233","display_name":"Bharath Ramesh","orcid":"https://orcid.org/0000-0002-6430-8587"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bharath Ramesh","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090838961","display_name":"Shulei Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shulei Xu","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034293705","display_name":"Hari Subramoni","orcid":"https://orcid.org/0000-0002-1200-2754"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hari Subramoni","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024879682","display_name":"Dhabaleswar K. Panda","orcid":"https://orcid.org/0000-0002-0356-1781"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dhabaleswar K. DK Panda","raw_affiliation_strings":["The Ohio State University, Columbus, OH, USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University, Columbus, OH, USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5049257402"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00206654,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"45","issue":"2","first_page":"36","last_page":"45"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.910515308380127},{"id":"https://openalex.org/keywords/infiniband","display_name":"InfiniBand","score":0.9069363474845886},{"id":"https://openalex.org/keywords/remote-direct-memory-access","display_name":"Remote direct memory access","score":0.7769944667816162},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7357223033905029},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.6098194122314453},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.5101504921913147},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4896112084388733},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.47507399320602417},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.46519172191619873},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.4540002644062042},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.43384021520614624},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.43206244707107544},{"id":"https://openalex.org/keywords/message-passing-interface","display_name":"Message Passing Interface","score":0.4107202887535095},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.36920446157455444},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.19774776697158813}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.910515308380127},{"id":"https://openalex.org/C2781030343","wikidata":"https://www.wikidata.org/wiki/Q922437","display_name":"InfiniBand","level":2,"score":0.9069363474845886},{"id":"https://openalex.org/C130795937","wikidata":"https://www.wikidata.org/wiki/Q2561570","display_name":"Remote direct memory access","level":2,"score":0.7769944667816162},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7357223033905029},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6098194122314453},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.5101504921913147},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4896112084388733},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.47507399320602417},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.46519172191619873},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.4540002644062042},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.43384021520614624},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.43206244707107544},{"id":"https://openalex.org/C166782233","wikidata":"https://www.wikidata.org/wiki/Q127879","display_name":"Message Passing Interface","level":3,"score":0.4107202887535095},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.36920446157455444},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.19774776697158813},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mm.2024.3524891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mm.2024.3524891","pdf_url":null,"source":{"id":"https://openalex.org/S59697426","display_name":"IEEE Micro","issn_l":"0272-1732","issn":["0272-1732","1937-4143"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Micro","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1736414626","display_name":null,"funder_award_id":"2112606","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G202197749","display_name":null,"funder_award_id":"1818253","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3358531649","display_name":null,"funder_award_id":"2311830","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4196433103","display_name":null,"funder_award_id":"2007991","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5271487896","display_name":null,"funder_award_id":"1854828","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8438779844","display_name":null,"funder_award_id":"2312927","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8585293727","display_name":null,"funder_award_id":"2018627","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2000531806","https://openalex.org/W2018346931","https://openalex.org/W2097493498","https://openalex.org/W2766672944","https://openalex.org/W3190316384","https://openalex.org/W3207354918","https://openalex.org/W4213027036","https://openalex.org/W4251390107","https://openalex.org/W4295791216","https://openalex.org/W4316252457","https://openalex.org/W4385723308","https://openalex.org/W4402402717","https://openalex.org/W6716975455","https://openalex.org/W7005924706"],"related_works":["https://openalex.org/W1978254186","https://openalex.org/W4245217724","https://openalex.org/W4390917331","https://openalex.org/W1984788566","https://openalex.org/W4253931064","https://openalex.org/W3166154920","https://openalex.org/W2154082760","https://openalex.org/W1964981582","https://openalex.org/W1970720081","https://openalex.org/W2751263050"],"abstract_inverted_index":{"The":[0,27,67],"presence":[1],"of":[2,73,115,118,127],"exascale":[3],"computers":[4],"has":[5],"pushed":[6],"a":[7,33,44,133],"new":[8],"boundary":[9],"in":[10,16,49,99],"computing":[11],"capability,":[12],"posing":[13],"performance":[14],"challenges":[15],"parallel":[17,37],"programming":[18],"models":[19],"on":[20,136],"how":[21],"to":[22,56,106,123],"exploit":[23],"such":[24],"systems":[25],"efficiently.":[26],"Message":[28],"Passing":[29],"Interface":[30],"(MPI)":[31],"is":[32,43,104],"dominant":[34],"model":[35],"for":[36],"programming.":[38],"Among":[39],"its":[40],"primitives,":[41],"MPI_Alltoall":[42],"communication-intensive":[45],"operation":[46],"widely":[47],"employed":[48],"numerous":[50],"applications,":[51],"yet":[52],"it":[53],"remains":[54],"challenging":[55],"optimize.":[57],"Alltoall":[58],"algorithms":[59],"are":[60],"mainly":[61],"classified":[62],"into":[63],"flat":[64],"and":[65,131,138,146],"hierarchical.":[66],"hierarchical":[68,93,119],"designs":[69,80,94],"avoid":[70],"the":[71,107,124],"slowdown":[72],"intra/inter-node":[74],"communication":[75],"by":[76,85],"decoupling":[77],"them.":[78],"Hierarchical":[79],"also":[81,95],"reduce":[82],"network":[83,112,125],"congestion":[84],"limiting":[86],"concurrently":[87],"injected":[88],"messages.":[89],"This":[90,102],"work":[91],"demonstrates":[92],"improve":[96],"connection":[97],"scalability":[98,126],"RDMA":[100],"networks.":[101],"improvement":[103],"attributed":[105],"cache":[108],"thrashing":[109],"happening":[110],"inside":[111],"adapters.":[113],"All":[114],"these":[116],"advantages":[117],"schemes":[120],"collectively":[121],"contribute":[122],"Alltoall.":[128],"We":[129],"propose":[130],"evaluate":[132],"network-agnostic":[134],"design":[135],"InfiniBand":[137],"Omni-Path":[139],"clusters,":[140],"showing":[141],"benefits":[142],"at":[143],"both":[144],"micro-benchmark":[145],"application":[147],"levels":[148],"over":[149],"other":[150],"MPI":[151],"libraries.":[152]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
