{"id":"https://openalex.org/W3202009823","doi":"https://doi.org/10.1145/3472456.3472467","title":"Prophet: Speeding up Distributed DNN Training with Predictable Communication Scheduling","display_name":"Prophet: Speeding up Distributed DNN Training with Predictable Communication Scheduling","publication_year":2021,"publication_date":"2021-08-09","ids":{"openalex":"https://openalex.org/W3202009823","doi":"https://doi.org/10.1145/3472456.3472467","mag":"3202009823"},"language":"en","primary_location":{"id":"doi:10.1145/3472456.3472467","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3472456.3472467","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"50th International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100744065","display_name":"Zhenwei Zhang","orcid":"https://orcid.org/0000-0002-5270-5104"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhenwei Zhang","raw_affiliation_strings":["East China Normal University, China"],"affiliations":[{"raw_affiliation_string":"East China Normal University, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073533659","display_name":"Qiang Qi","orcid":null},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiang Qi","raw_affiliation_strings":["East China Normal University, China"],"affiliations":[{"raw_affiliation_string":"East China Normal University, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039055026","display_name":"Ruitao Shang","orcid":null},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruitao Shang","raw_affiliation_strings":["East China Normal University, China"],"affiliations":[{"raw_affiliation_string":"East China Normal University, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100379221","display_name":"Li Chen","orcid":"https://orcid.org/0000-0002-2300-6996"},"institutions":[{"id":"https://openalex.org/I79516672","display_name":"University of Louisiana at Lafayette","ror":"https://ror.org/01x8rc503","country_code":"US","type":"education","lineage":["https://openalex.org/I2799628689","https://openalex.org/I79516672"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Li Chen","raw_affiliation_strings":["University of Louisiana at Lafayette, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Louisiana at Lafayette, United States of America","institution_ids":["https://openalex.org/I79516672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029626585","display_name":"Fei Xu","orcid":"https://orcid.org/0000-0003-1590-5323"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Xu","raw_affiliation_strings":["East China Normal University, China"],"affiliations":[{"raw_affiliation_string":"East China Normal University, China","institution_ids":["https://openalex.org/I66867065"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100744065"],"corresponding_institution_ids":["https://openalex.org/I66867065"],"apc_list":null,"apc_paid":null,"fwci":0.2914,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.56490389,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8925508260726929},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6651675701141357},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.6199731230735779},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.6117453575134277},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5682941675186157},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4925580620765686},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4638291597366333},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.43606874346733093},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36821824312210083},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1431039273738861},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10657674074172974}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8925508260726929},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6651675701141357},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6199731230735779},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.6117453575134277},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5682941675186157},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4925580620765686},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4638291597366333},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.43606874346733093},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36821824312210083},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1431039273738861},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10657674074172974},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3472456.3472467","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3472456.3472467","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"50th International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2426718398","display_name":null,"funder_award_id":"21ZR1419900","funder_id":"https://openalex.org/F4320309612","funder_display_name":"Natural Science Foundation of Shanghai"},{"id":"https://openalex.org/G6474270080","display_name":null,"funder_award_id":"20511102802 & 18DZ2270800","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"},{"id":"https://openalex.org/G8252784121","display_name":null,"funder_award_id":"61972158","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320309612","display_name":"Natural Science Foundation of Shanghai","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321885","display_name":"Science and Technology Commission of Shanghai Municipality","ror":"https://ror.org/03kt66j61"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W2060393849","https://openalex.org/W2113547287","https://openalex.org/W2132737349","https://openalex.org/W2163605009","https://openalex.org/W2168231600","https://openalex.org/W2186615578","https://openalex.org/W2402144811","https://openalex.org/W2526749411","https://openalex.org/W2904556356","https://openalex.org/W2920397365","https://openalex.org/W2922527104","https://openalex.org/W2944793600","https://openalex.org/W2952313066","https://openalex.org/W2962758826","https://openalex.org/W2963403751","https://openalex.org/W2963786636","https://openalex.org/W2975712713","https://openalex.org/W2979245724","https://openalex.org/W2982664135","https://openalex.org/W3008591352","https://openalex.org/W3014367186","https://openalex.org/W3037875189","https://openalex.org/W3047002910","https://openalex.org/W3047537431","https://openalex.org/W3095429755","https://openalex.org/W3096403968","https://openalex.org/W3097485584","https://openalex.org/W3098222317","https://openalex.org/W3153854431","https://openalex.org/W4288079579","https://openalex.org/W4289401659"],"related_works":["https://openalex.org/W2046435967","https://openalex.org/W4231775656","https://openalex.org/W2383646825","https://openalex.org/W2371018915","https://openalex.org/W2195904091","https://openalex.org/W2354191502","https://openalex.org/W1972225038","https://openalex.org/W3134658850","https://openalex.org/W2355938171","https://openalex.org/W2780079842"],"abstract_inverted_index":{"Optimizing":[0],"performance":[1,215,232],"for":[2],"Distributed":[3],"Deep":[4],"Neural":[5],"Network":[6],"(DDNN)":[7],"training":[8,22,43,55,195,214],"has":[9],"recently":[10],"become":[11],"increasingly":[12],"compelling,":[13],"as":[14,176,178,181],"the":[15,21,36,45,77,85,89,108,113,121,153,166,172,185,193,212,222],"DNN":[16,201],"model":[17],"gets":[18],"complex":[19],"and":[20,38,47,66,91,112,148],"dataset":[23],"grows":[24],"large.":[25],"While":[26],"existing":[27],"works":[28],"on":[29,34,204],"communication":[30,39,70,225],"scheduling":[31,71,226],"mostly":[32],"focus":[33],"overlapping":[35],"computation":[37],"to":[40,75,119,142,182,218],"improve":[41,211],"DDNN":[42,54,194,213],"performance,":[44],"GPU":[46,90,147,190],"network":[48,92,110,149,167],"resources":[49,150,191],"are":[50],"still":[51],"under-utilized":[52],"in":[53,61,80,165],"clusters.":[56],"To":[57],"tackle":[58],"this":[59,62],"issue,":[60],"paper,":[63],"we":[64],"design":[65],"implement":[67],"a":[68],"predictable":[69],"strategy":[72],"named":[73],"Prophet":[74,105,169,209],"schedule":[76],"gradient":[78,101,134,156],"transfer":[79,102,157],"an":[81],"adequate":[82],"order,":[83],"with":[84,199,221,229],"aim":[86],"of":[87,100,124,146,155,189],"maximizing":[88],"resource":[93],"utilization.":[94],"Leveraging":[95],"our":[96],"observed":[97],"stepwise":[98],"pattern":[99],"start":[103,175],"time,":[104],"first":[106],"uses":[107],"monitored":[109],"bandwidth":[111],"profiled":[114],"time":[115,188],"interval":[116],"among":[117],"gradients":[118,125,160,164],"predict":[120],"appropriate":[122],"number":[123],"that":[126,208],"can":[127,136,170,210],"be":[128,137],"grouped":[129],"into":[130],"blocks.":[131],"Then,":[132],"these":[133],"blocks":[135],"transferred":[138],"one":[139,141],"by":[140,216],"guarantee":[143],"high":[144],"utilization":[145],"while":[151],"ensuring":[152],"priority":[154],"(i.e.,":[158],"low-priority":[159],"cannot":[161],"preempt":[162],"high-priority":[163],"transfer).":[168],"make":[171],"forward":[173],"propagation":[174],"early":[177],"possible":[179],"so":[180],"greedily":[183],"reduce":[184],"waiting":[186],"(idle)":[187],"during":[192],"process.":[196],"Prototype":[197],"experiments":[198],"representative":[200],"models":[202],"trained":[203],"Amazon":[205],"EC2":[206],"demonstrate":[207],"up":[217],"40%":[219],"compared":[220],"state-of-the-art":[223],"priority-based":[224],"strategies,":[227],"yet":[228],"negligible":[230],"runtime":[231],"overhead.":[233]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
