{"id":"https://openalex.org/W7124980348","doi":"https://doi.org/10.1109/cloudcom67567.2025.11331477","title":"HERO: Hierarchical Efficient Reduction Optimization for Distributed Learning Systems","display_name":"HERO: Hierarchical Efficient Reduction Optimization for Distributed Learning Systems","publication_year":2025,"publication_date":"2025-11-14","ids":{"openalex":"https://openalex.org/W7124980348","doi":"https://doi.org/10.1109/cloudcom67567.2025.11331477"},"language":null,"primary_location":{"id":"doi:10.1109/cloudcom67567.2025.11331477","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cloudcom67567.2025.11331477","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 lEEE International Conference on Cloud Computing Technology and Science (CloudCom)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080803957","display_name":"Xudong Xiong","orcid":null},"institutions":[{"id":"https://openalex.org/I4388482657","display_name":"Shenzhen MSU-BIT University","ror":"https://ror.org/02q963474","country_code":null,"type":"education","lineage":["https://openalex.org/I4388482657"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xudong Xiong","raw_affiliation_strings":["Shenzhen MSU-BIT University,Faculty of Engineering,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen MSU-BIT University,Faculty of Engineering,Shenzhen,China","institution_ids":["https://openalex.org/I4388482657"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123386525","display_name":"Lihan Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4388482657","display_name":"Shenzhen MSU-BIT University","ror":"https://ror.org/02q963474","country_code":null,"type":"education","lineage":["https://openalex.org/I4388482657"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lihan Xu","raw_affiliation_strings":["Shenzhen MSU-BIT University,Faculty of Engineering,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen MSU-BIT University,Faculty of Engineering,Shenzhen,China","institution_ids":["https://openalex.org/I4388482657"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008114041","display_name":"Jie Chen","orcid":"https://orcid.org/0000-0002-9254-4413"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I4388482657","display_name":"Shenzhen MSU-BIT University","ror":"https://ror.org/02q963474","country_code":null,"type":"education","lineage":["https://openalex.org/I4388482657"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaxin Chen","raw_affiliation_strings":["Artificial Intelligence Research Institute, Shenzhen MSU-BIT University,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Research Institute, Shenzhen MSU-BIT University,Shenzhen,China","institution_ids":["https://openalex.org/I180726961","https://openalex.org/I4388482657"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5080803957"],"corresponding_institution_ids":["https://openalex.org/I4388482657"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.83293927,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.1573999971151352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.1573999971151352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.12380000203847885,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.12380000203847885,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/synchronizing","display_name":"Synchronizing","score":0.8310999870300293},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.5396000146865845},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.529699981212616},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.49779999256134033},{"id":"https://openalex.org/keywords/distributed-learning","display_name":"Distributed learning","score":0.4706999957561493},{"id":"https://openalex.org/keywords/distributed-algorithm","display_name":"Distributed algorithm","score":0.33230000734329224}],"concepts":[{"id":"https://openalex.org/C162932704","wikidata":"https://www.wikidata.org/wiki/Q1058791","display_name":"Synchronizing","level":3,"score":0.8310999870300293},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7718999981880188},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.5396000146865845},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.529699981212616},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.507099986076355},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.49779999256134033},{"id":"https://openalex.org/C2779582901","wikidata":"https://www.wikidata.org/wiki/Q21013010","display_name":"Distributed learning","level":2,"score":0.4706999957561493},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41100001335144043},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3343999981880188},{"id":"https://openalex.org/C130120984","wikidata":"https://www.wikidata.org/wiki/Q2835898","display_name":"Distributed algorithm","level":2,"score":0.33230000734329224},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.31839999556541443},{"id":"https://openalex.org/C123745756","wikidata":"https://www.wikidata.org/wiki/Q1665949","display_name":"Interconnection","level":2,"score":0.30970001220703125},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C70061542","wikidata":"https://www.wikidata.org/wiki/Q989016","display_name":"Distributed database","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26420000195503235},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.25679999589920044}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cloudcom67567.2025.11331477","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cloudcom67567.2025.11331477","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 lEEE International Conference on Cloud Computing Technology and Science (CloudCom)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8465124245","display_name":null,"funder_award_id":"62576213","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W3034853385","https://openalex.org/W3047537431","https://openalex.org/W3177554119","https://openalex.org/W4283796083","https://openalex.org/W4321392172","https://openalex.org/W4380873143","https://openalex.org/W4387544258","https://openalex.org/W4393207711","https://openalex.org/W4408182971","https://openalex.org/W4409762896"],"related_works":[],"abstract_inverted_index":{"The":[0],"proliferation":[1],"of":[2,18],"large-scale":[3,44],"deep":[4],"neural":[5],"networks":[6],"has":[7],"made":[8],"data-parallel":[9],"distributed":[10],"training":[11,137,154,158],"an":[12,93],"indispensable":[13],"paradigm.":[14],"However,":[15],"the":[16,19,58,70,108,145],"performance":[17],"all-reduce":[20,36,65,72,95,147],"algorithm":[21,121],"used":[22],"for":[23,57,112],"synchronizing":[24,113],"gradient":[25,114],"information":[26,115],"is":[27],"often":[28],"severely":[29],"constrained":[30],"by":[31,92,99],"inter-process":[32],"communication":[33,126],"overhead.":[34],"Standard":[35],"algorithms":[37],"exhibit":[38],"low":[39],"efficiency":[40],"when":[41],"applied":[42],"to":[43,86,144],"computing":[45],"clusters.":[46],"To":[47],"mitigate":[48],"this":[49,120],"bottleneck,":[50],"we":[51],"designed":[52],"a":[53,62,123],"hierarchical":[54,104],"interconnect":[55],"structure":[56],"cluster":[59],"and":[60],"propose":[61],"topology-aware,":[63],"multi-level":[64],"algorithm.":[66],"This":[67,103],"approach":[68],"decomposes":[69],"global":[71],"operation":[73,96],"into":[74,83,135],"two":[75],"phases:":[76],"first,":[77],"all":[78],"participating":[79],"processes":[80],"are":[81],"divided":[82],"multiple":[84],"groups":[85],"perform":[87],"efficient":[88],"intra-group":[89],"reduction,":[90],"followed":[91],"inter-group":[94],"carried":[97],"out":[98],"designated":[100],"leader":[101],"processes.":[102,117],"strategy":[105],"significantly":[106],"reduces":[107],"waiting":[109],"time":[110,155],"required":[111],"across":[116],"We":[118],"implement":[119],"as":[122],"non-intrusive":[124],"custom":[125],"hook":[127],"within":[128],"PyTorch's":[129],"DistributedDataParallel":[130],"framework,":[131],"enabling":[132],"seamless":[133],"integration":[134],"existing":[136],"workflows.":[138],"Experimental":[139],"results":[140],"demonstrate":[141],"that,":[142],"compared":[143],"standard":[146],"baseline,":[148],"our":[149],"method":[150],"considerably":[151],"shortens":[152],"end-to-end":[153],"while":[156],"maintaining":[157],"accuracy,":[159],"particularly":[160],"in":[161],"communication-bound":[162],"scenarios.":[163]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-01-21T00:00:00"}
