{"id":"https://openalex.org/W3177263144","doi":"https://doi.org/10.1145/3448016.3452773","title":"Heterogeneity-Aware Distributed Machine Learning Training via Partial Reduce","display_name":"Heterogeneity-Aware Distributed Machine Learning Training via Partial Reduce","publication_year":2021,"publication_date":"2021-06-09","ids":{"openalex":"https://openalex.org/W3177263144","doi":"https://doi.org/10.1145/3448016.3452773","mag":"3177263144"},"language":"en","primary_location":{"id":"doi:10.1145/3448016.3452773","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3448016.3452773","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015552951","display_name":"Xupeng Miao","orcid":"https://orcid.org/0000-0002-9371-8358"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xupeng Miao","raw_affiliation_strings":["Peking University &amp; Tencent Inc., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University &amp; Tencent Inc., Beijing, China","institution_ids":["https://openalex.org/I2250653659","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059601307","display_name":"Xiaonan Nie","orcid":"https://orcid.org/0000-0001-6766-757X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaonan Nie","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014615052","display_name":"Yingxia Shao","orcid":"https://orcid.org/0000-0002-8559-2628"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingxia Shao","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102862758","display_name":"Zhi Yang","orcid":"https://orcid.org/0000-0002-8219-4499"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi Yang","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102918834","display_name":"Jiawei Jiang","orcid":"https://orcid.org/0000-0003-0051-0046"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Jiawei Jiang","raw_affiliation_strings":["ETH Z\u00fcrich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"ETH Z\u00fcrich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102874660","display_name":"Lingxiao Ma","orcid":"https://orcid.org/0009-0009-9524-5476"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lingxiao Ma","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062357883","display_name":"Bin Cui","orcid":"https://orcid.org/0000-0003-1681-4677"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Cui","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5015552951"],"corresponding_institution_ids":["https://openalex.org/I20231570","https://openalex.org/I2250653659"],"apc_list":null,"apc_paid":null,"fwci":5.4386,"has_fulltext":false,"cited_by_count":59,"citation_normalized_percentile":{"value":0.96446362,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2262","last_page":"2270"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8682036399841309},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.7832431793212891},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.6476308703422546},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.5051615834236145},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.49192214012145996},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.47753798961639404},{"id":"https://openalex.org/keywords/homogeneous","display_name":"Homogeneous","score":0.459951788187027},{"id":"https://openalex.org/keywords/distributed-algorithm","display_name":"Distributed algorithm","score":0.41155701875686646},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.19491422176361084},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1370607316493988}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8682036399841309},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.7832431793212891},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6476308703422546},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.5051615834236145},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.49192214012145996},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.47753798961639404},{"id":"https://openalex.org/C66882249","wikidata":"https://www.wikidata.org/wiki/Q169336","display_name":"Homogeneous","level":2,"score":0.459951788187027},{"id":"https://openalex.org/C130120984","wikidata":"https://www.wikidata.org/wiki/Q2835898","display_name":"Distributed algorithm","level":2,"score":0.41155701875686646},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.19491422176361084},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1370607316493988},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3448016.3452773","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3448016.3452773","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.4300000071525574,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2029463628","https://openalex.org/W2057332538","https://openalex.org/W2108598243","https://openalex.org/W2166706236","https://openalex.org/W2194775991","https://openalex.org/W2336650964","https://openalex.org/W2612026221","https://openalex.org/W2741959974","https://openalex.org/W2744604411","https://openalex.org/W2765170276","https://openalex.org/W2789727279","https://openalex.org/W2799200478","https://openalex.org/W2948349252","https://openalex.org/W2951471551","https://openalex.org/W2953070460","https://openalex.org/W2962835968","https://openalex.org/W2962863496","https://openalex.org/W2963228337","https://openalex.org/W2963433607","https://openalex.org/W2963903325","https://openalex.org/W2966194222","https://openalex.org/W2967558351","https://openalex.org/W3030051638","https://openalex.org/W3035374030","https://openalex.org/W3090347762","https://openalex.org/W3101434632","https://openalex.org/W3136172274","https://openalex.org/W3175556629","https://openalex.org/W3176923454","https://openalex.org/W6637939107"],"related_works":["https://openalex.org/W2116677773","https://openalex.org/W4244478748","https://openalex.org/W4223488648","https://openalex.org/W2155261584","https://openalex.org/W2134969820","https://openalex.org/W2251605416","https://openalex.org/W4389340727","https://openalex.org/W3150465815","https://openalex.org/W2766289720","https://openalex.org/W1528719522"],"abstract_inverted_index":{"All-reduce":[0,21],"is":[1,22,147],"the":[2,13,17,37,65,86,95,98,131],"key":[3],"communication":[4,27],"primitive":[5,68],"used":[6],"in":[7,16,122,130],"distributed":[8,91,107],"data-parallel":[9],"training":[10],"due":[11],"to":[12,24,81,117],"high":[14,58],"performance":[15,62,138],"homogeneous":[18],"environment.":[19],"However,":[20],"sensitive":[23],"stragglers":[25],"and":[26,47,61,110,135],"delays":[28],"as":[29,90],"deep":[30],"learning":[31],"has":[32],"been":[33],"increasingly":[34],"deployed":[35],"on":[36],"heterogeneous":[38,123],"environment":[39],"like":[40],"cloud.":[41],"In":[42],"this":[43],"paper,":[44],"we":[45,101],"propose":[46,103],"analyze":[48],"a":[49,82,104,112,127],"novel":[50,113],"variant":[51],"of":[52,97],"all-reduce,":[53],"called":[54],"partial-reduce,":[55],"which":[56],"provides":[57],"heterogeneity":[59],"tolerance":[60],"by":[63],"decomposing":[64],"synchronous":[66],"all-reduce":[67],"into":[69],"parallel-asynchronous":[70],"partial-reduce":[71,79,99],"operations.":[72],"We":[73,125],"provide":[74],"theoretical":[75],"guarantees,":[76],"proving":[77],"that":[78,145],"converges":[80],"stationary":[83],"point":[84],"at":[85],"similar":[87],"sub-linear":[88],"rate":[89],"SGD.":[92],"To":[93],"enforce":[94],"convergence":[96],"primitive,":[100],"further":[102],"dynamic":[105],"staleness-aware":[106],"averaging":[108],"algorithm":[109],"implement":[111],"group":[114],"generation":[115],"mechanism":[116],"prevent":[118],"possible":[119],"update":[120],"isolation":[121],"environments.":[124],"build":[126],"prototype":[128],"system":[129],"real":[132],"production":[133],"cluster":[134],"validate":[136],"its":[137],"under":[139],"different":[140],"workloads.":[141],"The":[142],"experiments":[143],"show":[144],"it":[146],"1.21x-2x":[148],"faster":[149],"than":[150],"other":[151],"state-of-the-art":[152],"baselines.":[153]},"counts_by_year":[{"year":2025,"cited_by_count":19},{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":14},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
