{"id":"https://openalex.org/W4414898583","doi":"https://doi.org/10.1109/icdcs63083.2025.00094","title":"Shuffle-Exchange: Enhancing Collective Communication Efficiency for Large Model Training","display_name":"Shuffle-Exchange: Enhancing Collective Communication Efficiency for Large Model Training","publication_year":2025,"publication_date":"2025-07-21","ids":{"openalex":"https://openalex.org/W4414898583","doi":"https://doi.org/10.1109/icdcs63083.2025.00094"},"language":"en","primary_location":{"id":"doi:10.1109/icdcs63083.2025.00094","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdcs63083.2025.00094","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 45th International Conference on Distributed Computing Systems (ICDCS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zhihang Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhihang Tang","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100441352","display_name":"Chenxi Li","orcid":"https://orcid.org/0000-0002-9106-8048"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenxi Li","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Computer Science","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xiang Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiang Yang","raw_affiliation_strings":["Meituan Corporation"],"affiliations":[{"raw_affiliation_string":"Meituan Corporation","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100746631","display_name":"Bo He","orcid":"https://orcid.org/0000-0003-1301-4981"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo He","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Computer Science","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100406584","display_name":"Qi Qi","orcid":"https://orcid.org/0000-0003-0829-4624"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Qi","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Computer Science","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100432460","display_name":"Jingyu Wang","orcid":"https://orcid.org/0000-0002-2182-2228"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingyu Wang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Computer Science","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050753595","display_name":"Laiping Zhao","orcid":"https://orcid.org/0000-0003-1967-2192"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Laiping Zhao","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing","institution_ids":["https://openalex.org/I162868743"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31426242,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"923","last_page":"933"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.3653999865055084,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.3653999865055084,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.3368000090122223,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.8816999793052673},{"id":"https://openalex.org/keywords/synchronizing","display_name":"Synchronizing","score":0.8163999915122986},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.7573999762535095},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7455999851226807},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6205999851226807},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.565500020980835},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5648999810218811}],"concepts":[{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.8816999793052673},{"id":"https://openalex.org/C162932704","wikidata":"https://www.wikidata.org/wiki/Q1058791","display_name":"Synchronizing","level":3,"score":0.8163999915122986},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7574999928474426},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.7573999762535095},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7455999851226807},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6796000003814697},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6205999851226807},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.565500020980835},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5648999810218811},{"id":"https://openalex.org/C158156997","wikidata":"https://www.wikidata.org/wiki/Q1416645","display_name":"Models of communication","level":2,"score":0.48730000853538513},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.382999986410141},{"id":"https://openalex.org/C108734733","wikidata":"https://www.wikidata.org/wiki/Q1172333","display_name":"Data synchronization","level":3,"score":0.3783000111579895},{"id":"https://openalex.org/C168031717","wikidata":"https://www.wikidata.org/wiki/Q1530280","display_name":"Balance (ability)","level":2,"score":0.3449000120162964},{"id":"https://openalex.org/C138959212","wikidata":"https://www.wikidata.org/wiki/Q1806783","display_name":"Load balancing (electrical power)","level":3,"score":0.31929999589920044},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.30970001220703125},{"id":"https://openalex.org/C11644782","wikidata":"https://www.wikidata.org/wiki/Q15401790","display_name":"Cost efficiency","level":2,"score":0.2808000147342682},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.2574000060558319},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icdcs63083.2025.00094","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdcs63083.2025.00094","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 45th International Conference on Distributed Computing Systems (ICDCS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320311687","display_name":"Ministry of Education","ror":"https://ror.org/03m01yf64"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1992208280","https://openalex.org/W2057332538","https://openalex.org/W2083842231","https://openalex.org/W2112796928","https://openalex.org/W2904556356","https://openalex.org/W2963433607","https://openalex.org/W3007279825","https://openalex.org/W3081168214","https://openalex.org/W3090287616","https://openalex.org/W3129329365","https://openalex.org/W3129831491","https://openalex.org/W3132107458","https://openalex.org/W3204998121","https://openalex.org/W4214711850","https://openalex.org/W4290003882","https://openalex.org/W4308426193","https://openalex.org/W4385768131","https://openalex.org/W4386245174","https://openalex.org/W4387544273","https://openalex.org/W4393406935","https://openalex.org/W4401753248","https://openalex.org/W4401753250","https://openalex.org/W4403211334"],"related_works":[],"abstract_inverted_index":{"Training":[0],"large":[1,94,145],"models":[2],"in":[3,12,76,130],"parallel":[4],"by":[5],"GPU":[6],"clusters":[7],"significantly":[8],"accelerates":[9],"the":[10,16,22,35,38,52,65,72,89,100,144],"computation":[11,55],"each":[13,114],"iteration.":[14,115],"However,":[15],"frequent":[17],"collective":[18],"communication":[19,53,66,90],"for":[20,92],"synchronizing":[21],"huge":[23],"number":[24,39],"of":[25,40],"gradients":[26],"poses":[27],"a":[28,45,82,121,134],"scalability":[29],"challenge,":[30],"whose":[31],"performance":[32],"gradually":[33],"becomes":[34],"bottleneck":[36],"as":[37],"workers":[41,132],"increases.":[42],"Ring-reduce":[43],"is":[44,85,125],"favorable":[46],"architecture":[47],"since":[48],"it":[49],"can":[50,142],"balance":[51],"and":[54],"load":[56],"among":[57],"workers.":[58],"In":[59],"this":[60],"paper,":[61],"we":[62],"discover":[63],"that":[64,140],"resources":[67],"are":[68],"underutilized":[69],"when":[70],"using":[71],"ring-reduce":[73,111],"synchronization":[74],"method":[75,84],"clusters.":[77],"Accordingly,":[78],"Shuffle-Exchange":[79],"Synchronization":[80],"(SES),":[81],"novel":[83],"proposed":[86],"to":[87,148],"improve":[88],"efficiency":[91],"distributed":[93],"model":[95,146,152],"training.":[96],"SES":[97,141],"organizes":[98],"all":[99],"worker":[101],"nodes":[102],"into":[103,127],"several":[104],"groups,":[105],"within":[106],"which":[107],"they":[108],"perform":[109],"small-scale":[110],"synchronizations":[112],"during":[113],"To":[116],"achieve":[117],"better":[118],"convergence":[119],"performance,":[120],"gradient":[122],"correction":[123],"operation":[124],"integrated":[126],"SES.":[128],"Experiments":[129],"16":[131],"on":[133],"real-world":[135],"industrial":[136],"computing":[137],"platform,":[138],"show":[139],"accelerate":[143],"training":[147],"1.97\u00d7":[149],"without":[150],"losing":[151],"performance.":[153]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
