{"id":"https://openalex.org/W2562749854","doi":"https://doi.org/10.1145/3126686.3126749","title":"Efficient Communications in Training Large Scale Neural Networks","display_name":"Efficient Communications in Training Large Scale Neural Networks","publication_year":2017,"publication_date":"2017-10-23","ids":{"openalex":"https://openalex.org/W2562749854","doi":"https://doi.org/10.1145/3126686.3126749","mag":"2562749854"},"language":"en","primary_location":{"id":"doi:10.1145/3126686.3126749","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3126686.3126749","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the on Thematic Workshops of ACM Multimedia 2017","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084877885","display_name":"Yiyang Zhao","orcid":"https://orcid.org/0000-0003-4935-0688"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yiyang Zhao","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010181097","display_name":"Linnan Wang","orcid":"https://orcid.org/0000-0001-6114-7098"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Linnan Wang","raw_affiliation_strings":["Brown University, Rhode Island, USA"],"affiliations":[{"raw_affiliation_string":"Brown University, Rhode Island, USA","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027254893","display_name":"Wei Wu","orcid":"https://orcid.org/0000-0002-2750-6365"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei Wu","raw_affiliation_strings":["University of Tennessee, Knoxville, TN, USA"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, TN, USA","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010055736","display_name":"George Bosilca","orcid":"https://orcid.org/0000-0003-2411-8495"},"institutions":[{"id":"https://openalex.org/I75027704","display_name":"University of Tennessee at Knoxville","ror":"https://ror.org/020f3ap87","country_code":"US","type":"education","lineage":["https://openalex.org/I75027704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"George Bosilca","raw_affiliation_strings":["University of Tennessee, Knoxville, TN, USA"],"affiliations":[{"raw_affiliation_string":"University of Tennessee, Knoxville, TN, USA","institution_ids":["https://openalex.org/I75027704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016738770","display_name":"Richard Vuduc","orcid":"https://orcid.org/0000-0003-2178-138X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard Vuduc","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062753948","display_name":"Jinmian Ye","orcid":null},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinmian Ye","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101954684","display_name":"Wenqi Tang","orcid":"https://orcid.org/0000-0002-0379-7081"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenqi Tang","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051227924","display_name":"Zenglin Xu","orcid":"https://orcid.org/0000-0001-5550-6461"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zenglin Xu","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5084877885"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":1.9502,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.89098041,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"110","last_page":"116"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.854424774646759},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.8281538486480713},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.7307239174842834},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.6787097454071045},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5812759399414062},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5424573421478271},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.48434388637542725},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4837039113044739},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.47451895475387573},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.47219714522361755},{"id":"https://openalex.org/keywords/bulk-synchronous-parallel","display_name":"Bulk synchronous parallel","score":0.4472646117210388},{"id":"https://openalex.org/keywords/parallel-algorithm","display_name":"Parallel algorithm","score":0.32651257514953613},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.23645302653312683},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21884670853614807}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.854424774646759},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.8281538486480713},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.7307239174842834},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.6787097454071045},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5812759399414062},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5424573421478271},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.48434388637542725},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4837039113044739},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.47451895475387573},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.47219714522361755},{"id":"https://openalex.org/C156891508","wikidata":"https://www.wikidata.org/wiki/Q1004114","display_name":"Bulk synchronous parallel","level":3,"score":0.4472646117210388},{"id":"https://openalex.org/C120373497","wikidata":"https://www.wikidata.org/wiki/Q1087987","display_name":"Parallel algorithm","level":2,"score":0.32651257514953613},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.23645302653312683},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21884670853614807},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3126686.3126749","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3126686.3126749","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the on Thematic Workshops of ACM Multimedia 2017","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1526114793","https://openalex.org/W1558115001","https://openalex.org/W1583837637","https://openalex.org/W1825216778","https://openalex.org/W2007589459","https://openalex.org/W2028191548","https://openalex.org/W2060393849","https://openalex.org/W2127941149","https://openalex.org/W2130062883","https://openalex.org/W2131613942","https://openalex.org/W2132737349","https://openalex.org/W2141107779","https://openalex.org/W2146502635","https://openalex.org/W2152783013","https://openalex.org/W2155893237","https://openalex.org/W2157462866","https://openalex.org/W2162390675","https://openalex.org/W2164945803","https://openalex.org/W2166706236","https://openalex.org/W2168231600","https://openalex.org/W2170796499","https://openalex.org/W2208150243","https://openalex.org/W2286848013","https://openalex.org/W2298503502","https://openalex.org/W2339765813","https://openalex.org/W2407022425","https://openalex.org/W2577965184","https://openalex.org/W2951781666","https://openalex.org/W2952033860","https://openalex.org/W2964181194","https://openalex.org/W2998508934","https://openalex.org/W3004511899","https://openalex.org/W3150273738"],"related_works":["https://openalex.org/W2039229847","https://openalex.org/W55423022","https://openalex.org/W2357113990","https://openalex.org/W1981558729","https://openalex.org/W4294310378","https://openalex.org/W49536250","https://openalex.org/W2013206009","https://openalex.org/W2383294421","https://openalex.org/W3082558912","https://openalex.org/W188021169"],"abstract_inverted_index":{"We":[0,148],"consider":[1],"the":[2,8,16,83,97,108,113,116,156,166],"problem":[3],"of":[4,10,19,40,99,110,115,170],"how":[5],"to":[6,74,82,103,131,152],"reduce":[7,159],"cost":[9,98,114],"communication":[11,36,160],"that":[12,86,140,155],"is":[13,80,101,107],"required":[14],"for":[15,44,49,70],"parallel":[17,59],"training":[18],"a":[20,67],"neural":[21],"network.":[22],"The":[23],"state-of-the-art":[24],"method,":[25],"Bulk":[26],"Synchronous":[27],"Parallel":[28],"Stochastic":[29],"Gradient":[30],"Descent":[31],"(BSP-SGD),":[32],"requires":[33],"many":[34],"collective":[35,71],"operations,":[37,72],"like":[38,124],"broadcasts":[39],"parameters":[41],"or":[42],"reductions":[43],"partial":[45],"gradient":[46],"aggregations,":[47],"which":[48],"large":[50],"messages":[51],"quickly":[52],"dominates":[53],"overall":[54],"execution":[55],"time":[56],"and":[57,90],"limits":[58],"scalability.":[60],"To":[61],"address":[62],"this":[63],"problem,":[64],"we":[65],"develop":[66],"new":[68],"technique":[69],"referred":[73],"as":[75],"Linear":[76],"Pipelining":[77],"(LP).":[78],"It":[79],"tuned":[81],"message":[84],"sizes":[85],"arise":[87],"in":[88,162],"BSP-SGD,":[89,153],"works":[91],"effectively":[92],"on":[93],"multi-GPU":[94],"systems.":[95],"Theoretically,":[96],"LP":[100,127],"invariant":[102],"P,":[104],"where":[105],"P":[106],"number":[109],"GPUs,":[111],"while":[112,164],"more":[117],"conventional":[118],"Minimum":[119],"Spanning":[120],"Tree":[121],"(MST)":[122],"scales":[123],"O(log":[125],"P).":[126],"also":[128],"demonstrates":[129],"up":[130],"2x":[132],"higher":[133],"bandwidth":[134],"than":[135],"Bidirectional":[136],"Exchange":[137],"(BE)":[138],"techniques":[139],"are":[141],"widely":[142],"adopted":[143],"by":[144],"current":[145],"MPI":[146],"implementations.":[147],"apply":[149],"these":[150],"collectives":[151],"showing":[154],"proposed":[157],"implementations":[158],"bottlenecks":[161],"practice":[163],"preserving":[165],"attractive":[167],"convergence":[168],"properties":[169],"BSP-SGD.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
