{"id":"https://openalex.org/W4387007202","doi":"https://doi.org/10.1145/3603165.3607399","title":"Augmenting Distributed AI Training with Loss-tolerant Transmission","display_name":"Augmenting Distributed AI Training with Loss-tolerant Transmission","publication_year":2023,"publication_date":"2023-07-28","ids":{"openalex":"https://openalex.org/W4387007202","doi":"https://doi.org/10.1145/3603165.3607399"},"language":"en","primary_location":{"id":"doi:10.1145/3603165.3607399","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1145/3603165.3607399","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Turing Award Celebration Conference - China 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101458538","display_name":"Zixuan Chen","orcid":"https://orcid.org/0000-0002-0126-0387"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zixuan Chen","raw_affiliation_strings":["School of Computer Science, Fudan University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101499716","display_name":"Lei Shi","orcid":"https://orcid.org/0009-0004-0649-5443"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Shi","raw_affiliation_strings":["School of Computer Science, Fudan University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102721179","display_name":"Yongbo Gao","orcid":"https://orcid.org/0009-0008-9291-0605"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongbo Gao","raw_affiliation_strings":["School of Computer Science, Fudan University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102778994","display_name":"Xuandong Liu","orcid":"https://orcid.org/0009-0002-6521-6200"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuandong Liu","raw_affiliation_strings":["School of Computer Science, Fudan University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039807057","display_name":"Xin Ai","orcid":"https://orcid.org/0000-0002-7870-9867"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Ai","raw_affiliation_strings":["School of Computer Science, Fudan University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100358497","display_name":"Sen Liu","orcid":"https://orcid.org/0000-0003-2230-7671"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sen Liu","raw_affiliation_strings":["School of Computer Science, Fudan University, China and Institute of FinTech, Fudan University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, China and Institute of FinTech, Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014490044","display_name":"Yang Xu","orcid":"https://orcid.org/0000-0002-0958-8547"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Xu","raw_affiliation_strings":["School of Computer Science, Fudan University, China and Peng Cheng Laboratory, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, China and Peng Cheng Laboratory, China","institution_ids":["https://openalex.org/I4210136793","https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101458538"],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11922826,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"65","last_page":"66"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13553","display_name":"Age of Information Optimization","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/retransmission","display_name":"Retransmission","score":0.8562207818031311},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8039633631706238},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.7570366859436035},{"id":"https://openalex.org/keywords/packet-loss","display_name":"Packet loss","score":0.67817622423172},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.5932813882827759},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.4789869785308838},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.44115498661994934},{"id":"https://openalex.org/keywords/network-packet","display_name":"Network packet","score":0.43239080905914307},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.42179858684539795},{"id":"https://openalex.org/keywords/transmission","display_name":"Transmission (telecommunications)","score":0.41328728199005127},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.35216307640075684},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21394741535186768},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.11769071221351624},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07425501942634583}],"concepts":[{"id":"https://openalex.org/C180611318","wikidata":"https://www.wikidata.org/wiki/Q7316902","display_name":"Retransmission","level":3,"score":0.8562207818031311},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8039633631706238},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.7570366859436035},{"id":"https://openalex.org/C54108766","wikidata":"https://www.wikidata.org/wiki/Q391064","display_name":"Packet loss","level":3,"score":0.67817622423172},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.5932813882827759},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.4789869785308838},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.44115498661994934},{"id":"https://openalex.org/C158379750","wikidata":"https://www.wikidata.org/wiki/Q214111","display_name":"Network packet","level":2,"score":0.43239080905914307},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.42179858684539795},{"id":"https://openalex.org/C761482","wikidata":"https://www.wikidata.org/wiki/Q118093","display_name":"Transmission (telecommunications)","level":2,"score":0.41328728199005127},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.35216307640075684},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21394741535186768},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.11769071221351624},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07425501942634583}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3603165.3607399","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1145/3603165.3607399","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Turing Award Celebration Conference - China 2023","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7291016096","display_name":null,"funder_award_id":"62150610497, 62172108, 62002066","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7648992395","display_name":null,"funder_award_id":"23ZR1404900","funder_id":"https://openalex.org/F4320309612","funder_display_name":"Natural Science Foundation of Shanghai"}],"funders":[{"id":"https://openalex.org/F4320309612","display_name":"Natural Science Foundation of Shanghai","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":1,"referenced_works":["https://openalex.org/W2103110737"],"related_works":["https://openalex.org/W2157629996","https://openalex.org/W2317536633","https://openalex.org/W1567265102","https://openalex.org/W1982141291","https://openalex.org/W2265140367","https://openalex.org/W1657874591","https://openalex.org/W2385271573","https://openalex.org/W2362028771","https://openalex.org/W2978122436","https://openalex.org/W2050266259"],"abstract_inverted_index":{"Parameter":[0],"server":[1],"(PS)":[2],"communication":[3,91],"architecture":[4],"in":[5,20],"distributed":[6],"machine":[7],"learning":[8],"(DML)":[9],"systems":[10],"is":[11],"utilized":[12],"to":[13,68,74],"enhance":[14],"the":[15,55,80],"speed":[16],"of":[17,64],"model":[18],"training":[19,47,94],"data":[21],"centers":[22],"(DCs)":[23],"and":[24,39,72,93],"edge":[25],"nodes.":[26],"However,":[27],"it":[28],"faces":[29],"severe":[30],"long-tail":[31],"latency":[32,92],"caused":[33],"by":[34],"many-to-one":[35],"\"incast\"":[36],"traffic":[37],"patterns":[38],"suffers":[40],"from":[41],"non-congestion":[42],"packet":[43],"loss,":[44],"negatively":[45],"impacting":[46],"throughput.":[48],"To":[49],"address":[50],"this":[51],"challenge,":[52],"we":[53],"design":[54],"Loss-tolerant":[56],"Transmission":[57],"Protocol":[58],"(LTP),":[59],"which":[60],"permits":[61],"partial":[62],"loss":[63],"gradients":[65],"during":[66],"synchronization":[67,76],"avoid":[69],"unneeded":[70],"retransmission":[71],"contributes":[73],"faster":[75],"per":[77],"iteration.":[78],"Moreover,":[79],"preliminary":[81],"evaluation":[82],"shows":[83],"that":[84],"LTP":[85],"outperforms":[86],"other":[87],"schemes":[88],"on":[89],"both":[90],"accuracy.":[95]},"counts_by_year":[],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
