{"id":"https://openalex.org/W4413757132","doi":"https://doi.org/10.1145/3718958.3750514","title":"ResCCL: Resource-Efficient Scheduling for Collective Communication","display_name":"ResCCL: Resource-Efficient Scheduling for Collective Communication","publication_year":2025,"publication_date":"2025-08-27","ids":{"openalex":"https://openalex.org/W4413757132","doi":"https://doi.org/10.1145/3718958.3750514"},"language":"en","primary_location":{"id":"doi:10.1145/3718958.3750514","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3718958.3750514","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3718958.3750514","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGCOMM 2025 Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3718958.3750514","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Tongrui Liu","orcid":"https://orcid.org/0009-0007-5722-4063"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tongrui Liu","raw_affiliation_strings":["Northeastern University, Shenyang, China"],"raw_orcid":"https://orcid.org/0009-0007-5722-4063","affiliations":[{"raw_affiliation_string":"Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093548286","display_name":"Chenyang Hei","orcid":"https://orcid.org/0000-0001-5010-1529"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenyang Hei","raw_affiliation_strings":["Northeastern University, Shenyang, China"],"raw_orcid":"https://orcid.org/0000-0001-5010-1529","affiliations":[{"raw_affiliation_string":"Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083598234","display_name":"Fuliang Li","orcid":"https://orcid.org/0000-0001-9782-0053"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fuliang Li","raw_affiliation_strings":["Northeastern University, Shenyang, China"],"raw_orcid":"https://orcid.org/0000-0001-9782-0053","affiliations":[{"raw_affiliation_string":"Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106856666","display_name":"Chengxi Gao","orcid":"https://orcid.org/0000-0003-1386-7394"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengxi Gao","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0003-1386-7394","affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114200761","display_name":"Jiamin Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiamin Cao","raw_affiliation_strings":["Alibaba Cloud, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-5468-7366","affiliations":[{"raw_affiliation_string":"Alibaba Cloud, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tianshu Wang","orcid":"https://orcid.org/0009-0008-9602-8417"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianshu Wang","raw_affiliation_strings":["Alibaba Cloud, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0008-9602-8417","affiliations":[{"raw_affiliation_string":"Alibaba Cloud, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103040133","display_name":"Ennan Zhai","orcid":"https://orcid.org/0000-0003-4352-7497"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ennan Zhai","raw_affiliation_strings":["Alibaba Cloud, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0003-4352-7497","affiliations":[{"raw_affiliation_string":"Alibaba Cloud, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086590282","display_name":"X. Wang","orcid":"https://orcid.org/0000-0001-8668-3524"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingwei Wang","raw_affiliation_strings":["Northeastern University, Shenyang, China"],"raw_orcid":"https://orcid.org/0000-0001-8668-3524","affiliations":[{"raw_affiliation_string":"Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I9224756"],"apc_list":null,"apc_paid":null,"fwci":1.3517,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.8509347,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"55","last_page":"70"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7359119653701782},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.563847541809082},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.49650317430496216},{"id":"https://openalex.org/keywords/processor-scheduling","display_name":"Processor scheduling","score":0.46330147981643677},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.3540242612361908},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.33484727144241333},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09309631586074829},{"id":"https://openalex.org/keywords/operations-management","display_name":"Operations management","score":0.05902513861656189}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7359119653701782},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.563847541809082},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.49650317430496216},{"id":"https://openalex.org/C2984822820","wikidata":"https://www.wikidata.org/wiki/Q1123036","display_name":"Processor scheduling","level":3,"score":0.46330147981643677},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3540242612361908},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.33484727144241333},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09309631586074829},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.05902513861656189}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3718958.3750514","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3718958.3750514","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3718958.3750514","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGCOMM 2025 Conference","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3718958.3750514","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3718958.3750514","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3718958.3750514","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGCOMM 2025 Conference","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.44999998807907104,"id":"https://metadata.un.org/sdg/8"}],"awards":[{"id":"https://openalex.org/G218614023","display_name":null,"funder_award_id":"XLYC2403086","funder_id":"https://openalex.org/F4320329895","funder_display_name":"Liaoning Revitalization Talents Program"},{"id":"https://openalex.org/G3253723913","display_name":null,"funder_award_id":"U22B200","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6760061280","display_name":null,"funder_award_id":"U22B2005","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7797178789","display_name":null,"funder_award_id":"62432003","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329895","display_name":"Liaoning Revitalization Talents Program","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4413757132.pdf","grobid_xml":"https://content.openalex.org/works/W4413757132.grobid-xml"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W2021596576","https://openalex.org/W2149939304","https://openalex.org/W2152340385","https://openalex.org/W2341139018","https://openalex.org/W2745034467","https://openalex.org/W2913200956","https://openalex.org/W3006487403","https://openalex.org/W3072623287","https://openalex.org/W3129488589","https://openalex.org/W3190977274","https://openalex.org/W3193985311","https://openalex.org/W4290940003","https://openalex.org/W4318541593","https://openalex.org/W4360831831","https://openalex.org/W4395073435","https://openalex.org/W4401175629","https://openalex.org/W4401176593","https://openalex.org/W4401176820","https://openalex.org/W4401176822","https://openalex.org/W4404955085"],"related_works":["https://openalex.org/W2128410848","https://openalex.org/W2118368532","https://openalex.org/W2102390841","https://openalex.org/W3047653192","https://openalex.org/W2096289371","https://openalex.org/W2126232624","https://openalex.org/W2434525066","https://openalex.org/W3140149227","https://openalex.org/W2946406471","https://openalex.org/W2130555437"],"abstract_inverted_index":{"As":[0],"distributed":[1],"deep":[2],"learning":[3],"training":[4],"(DLT)":[5],"systems":[6],"scale,":[7],"collective":[8],"communication":[9,27,89,108],"has":[10],"become":[11],"a":[12,50],"significant":[13],"performance":[14,42,122],"bottleneck.":[15],"While":[16],"current":[17,63],"approaches":[18],"optimize":[19],"bandwidth":[20,121],"utilization":[21,139],"and":[22,78,86,106,127,136],"task":[23],"completion":[24],"time,":[25],"existing":[26],"libraries":[28],"(CCLs)":[29],"backends":[30],"fail":[31],"to":[32,58,91,117,124,156],"efficiently":[33],"manage":[34],"GPU":[35],"resources":[36],"during":[37],"algorithm":[38],"execution,":[39],"limiting":[40],"the":[41,73,98,144],"of":[43],"advanced":[44],"algorithms.":[45,146],"This":[46],"paper":[47],"proposes":[48],"ResCCL,":[49],"novel":[51],"CCL":[52],"backend":[53],"designed":[54],"for":[55],"Resource-Efficient":[56],"Scheduling":[57],"address":[59],"key":[60],"limitations":[61],"in":[62,120],"systems.":[64],"ResCCL":[65,114,150],"enhances":[66,107],"execution":[67],"efficiency":[68],"by":[69,134,140,154],"optimizing":[70],"scheduling":[71,100],"at":[72],"primitive":[74],"level":[75],"(e.g.,":[76],"send":[77],"recvReduceCopy),":[79],"enabling":[80],"flexible":[81],"thread":[82],"block":[83],"(TB)":[84],"allocation,":[85],"generating":[87],"lightweight":[88],"kernels":[90],"minimize":[92],"runtime":[93],"overhead.":[94],"Our":[95],"approach":[96],"tackles":[97],"global":[99],"problem,":[101],"reduces":[102,130],"idle":[103],"TB":[104,138],"resources,":[105],"bandwidth.":[109],"Evaluation":[110],"results":[111],"demonstrate":[112],"that":[113],"achieves":[115],"up":[116,155],"2.5\u00d7":[118],"improvement":[119],"compared":[123],"both":[125],"NCCL":[126],"MSCCL.":[128],"It":[129],"SM":[131],"resource":[132],"overhead":[133],"77.8%":[135],"increases":[137],"41.6%":[141],"while":[142],"running":[143],"same":[145],"In":[147],"end-to-end":[148],"DLT,":[149],"boosts":[151],"Megatron's":[152],"throughput":[153],"39%.":[157]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
