{"id":"https://openalex.org/W4383995751","doi":"https://doi.org/10.1145/3605573.3605650","title":"OSP: Boosting Distributed Model Training with 2-stage Synchronization","display_name":"OSP: Boosting Distributed Model Training with 2-stage Synchronization","publication_year":2023,"publication_date":"2023-08-07","ids":{"openalex":"https://openalex.org/W4383995751","doi":"https://doi.org/10.1145/3605573.3605650"},"language":"en","primary_location":{"id":"doi:10.1145/3605573.3605650","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3605573.3605650","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101458538","display_name":"Zixuan Chen","orcid":"https://orcid.org/0000-0002-0126-0387"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zixuan Chen","raw_affiliation_strings":["Fudan University, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101499716","display_name":"Lei Shi","orcid":"https://orcid.org/0009-0004-0649-5443"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Shi","raw_affiliation_strings":["Fudan University, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102778994","display_name":"Xuandong Liu","orcid":"https://orcid.org/0009-0002-6521-6200"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuandong Liu","raw_affiliation_strings":["Fudan University, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100462961","display_name":"Jiahui Li","orcid":"https://orcid.org/0000-0002-6559-188X"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiahui Li","raw_affiliation_strings":["Fudan University, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100358497","display_name":"Sen Liu","orcid":"https://orcid.org/0000-0003-2230-7671"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sen Liu","raw_affiliation_strings":["School of Computer Science, Fudan University, China and Institute of FinTech, Fudan University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, China and Institute of FinTech, Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014490044","display_name":"Yang Xu","orcid":"https://orcid.org/0000-0002-0958-8547"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Xu","raw_affiliation_strings":["Fudan University, China and Peng Cheng Laboratory, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, China and Peng Cheng Laboratory, China","institution_ids":["https://openalex.org/I4210136793","https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101458538"],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":null,"apc_paid":null,"fwci":0.8698,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.78555097,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"102","last_page":"111"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9735999703407288,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.920757532119751},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7242220044136047},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.5729258060455322},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.49114343523979187},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46632057428359985},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3369755148887634},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.14686426520347595}],"concepts":[{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.920757532119751},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7242220044136047},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.5729258060455322},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.49114343523979187},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46632057428359985},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3369755148887634},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.14686426520347595},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3605573.3605650","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3605573.3605650","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7291016096","display_name":null,"funder_award_id":"62150610497, 62172108, 62002066","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7648992395","display_name":null,"funder_award_id":"23ZR1404900","funder_id":"https://openalex.org/F4320309612","funder_display_name":"Natural Science Foundation of Shanghai"}],"funders":[{"id":"https://openalex.org/F4320309612","display_name":"Natural Science Foundation of Shanghai","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1975934607","https://openalex.org/W2103110737","https://openalex.org/W2108598243","https://openalex.org/W2194775991","https://openalex.org/W2747329762","https://openalex.org/W2788728386","https://openalex.org/W2920397365","https://openalex.org/W2963786636","https://openalex.org/W2964412985","https://openalex.org/W2965862774","https://openalex.org/W2970971581","https://openalex.org/W2982664135","https://openalex.org/W3089472875","https://openalex.org/W3109982791","https://openalex.org/W3124675547","https://openalex.org/W3138920078","https://openalex.org/W3176179930","https://openalex.org/W3202665973","https://openalex.org/W3204921648","https://openalex.org/W4213052788","https://openalex.org/W4316252369","https://openalex.org/W4385367428"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3082059448","https://openalex.org/W4313640622","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694"],"abstract_inverted_index":{"Distributed":[0],"deep":[1,17,145],"learning":[2,18,146],"(DDL)":[3],"is":[4,42],"a":[5,44,98,113,151],"promising":[6],"research":[7],"area,":[8],"which":[9,108],"aims":[10],"to":[11,35,59,78,123,162,171],"increase":[12],"the":[13,28,37,86],"efficiency":[14],"of":[15,23,31,49,70,88,133],"training":[16],"tasks":[19],"with":[20,112,150],"large":[21],"size":[22],"datasets":[24,149],"and":[25,52,81,117,140,148],"models.":[26,174],"As":[27],"computation":[29],"capability":[30],"DDL":[32],"nodes":[33,41],"continues":[34],"increase,":[36],"network":[38],"connection":[39],"between":[40],"becoming":[43],"major":[45],"bottleneck.":[46],"Various":[47],"methods":[48,71],"gradient":[50],"compression":[51],"improved":[53],"model":[54,89,100],"synchronization":[55,101,115,173],"have":[56,82],"been":[57,136],"proposed":[58],"address":[60,93],"this":[61],"bottleneck":[62],"in":[63,74,165],"Parameter-Server-based":[64],"DDL.":[65],"However,":[66],"these":[67,94],"two":[68],"types":[69],"can":[72,159],"result":[73],"accuracy":[75,125,168],"loss":[76,126,169],"due":[77],"discarded":[79],"gradients":[80],"limited":[83],"enhancement":[84],"on":[85,142],"throughput":[87,166],"synchronization,":[90],"respectively.":[91],"To":[92],"challenges,":[95],"we":[96],"propose":[97],"new":[99],"method":[102],"named":[103],"Overlapped":[104],"Synchronization":[105],"Parallel":[106],"(OSP),":[107],"achieves":[109],"efficient":[110],"communication":[111],"2-stage":[114],"approach":[116],"uses":[118],"Local-Gradient-based":[119],"Parameter":[120],"correction":[121],"(LGP)":[122],"avoid":[124],"caused":[127],"by":[128],"stale":[129],"parameters.":[130],"The":[131],"prototype":[132],"OSP":[134,158],"has":[135],"implemented":[137],"using":[138],"PyTorch":[139],"evaluated":[141],"commonly":[143],"used":[144],"models":[147],"9-node":[152],"testbed.":[153],"Evaluation":[154],"results":[155],"show":[156],"that":[157],"achieve":[160],"up":[161],"50%":[163],"improvement":[164],"without":[167],"compared":[170],"popular":[172]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
