{"id":"https://openalex.org/W7161807340","doi":"https://doi.org/10.1109/tnse.2026.3695132","title":"Gsched: Coordinated Flow-Control and Priority Scheduling for DNN Training in AI Cluster","display_name":"Gsched: Coordinated Flow-Control and Priority Scheduling for DNN Training in AI Cluster","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7161807340","doi":"https://doi.org/10.1109/tnse.2026.3695132"},"language":null,"primary_location":{"id":"doi:10.1109/tnse.2026.3695132","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnse.2026.3695132","pdf_url":null,"source":{"id":"https://openalex.org/S2484352698","display_name":"IEEE Transactions on Network Science and Engineering","issn_l":"2327-4697","issn":["2327-4697","2334-329X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Network Science and Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136532950","display_name":"Heng Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Heng Xu","raw_affiliation_strings":["Faculty of Computer Science, Artificial Intelligence, Shenzhen University of Advanced Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0001-1599-8200","affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, Artificial Intelligence, Shenzhen University of Advanced Technology, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380","https://openalex.org/I3131625388"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126735484","display_name":"Chengze Du","orcid":null},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengze Du","raw_affiliation_strings":["Faculty of Computer Science, Artificial Intelligence, Shenzhen University of Advanced Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0005-5313-7750","affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, Artificial Intelligence, Shenzhen University of Advanced Technology, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380","https://openalex.org/I3131625388"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136514145","display_name":"Zhiwei Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiwei Yu","raw_affiliation_strings":["Institute for Network Sciences, Cyberspace, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0009-7776-4985","affiliations":[{"raw_affiliation_string":"Institute for Network Sciences, Cyberspace, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101847187","display_name":"Li Li","orcid":"https://orcid.org/0000-0002-2803-3803"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Letian Li","raw_affiliation_strings":["Department of Information Engineering, Chinese University of Hong Kong, Shatin, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0001-6856-1785","affiliations":[{"raw_affiliation_string":"Department of Information Engineering, Chinese University of Hong Kong, Shatin, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136529332","display_name":"Ying Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Zhou","raw_affiliation_strings":["Faculty of Computer Science, Artificial Intelligence, Shenzhen University of Advanced Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0001-9803-262X","affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, Artificial Intelligence, Shenzhen University of Advanced Technology, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380","https://openalex.org/I3131625388"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136573210","display_name":"Bo Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Liu","raw_affiliation_strings":["Faculty of Computer Science, Artificial Intelligence, Shenzhen University of Advanced Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-8695-7342","affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, Artificial Intelligence, Shenzhen University of Advanced Technology, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380","https://openalex.org/I3131625388"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5136599052","display_name":"Jialong Li","orcid":null},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jialong Li","raw_affiliation_strings":["Faculty of Computer Science, Artificial Intelligence, Shenzhen University of Advanced Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0003-3416-5551","affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, Artificial Intelligence, Shenzhen University of Advanced Technology, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380","https://openalex.org/I3131625388"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.80126875,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":null,"first_page":"9572","last_page":"9590"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.30790001153945923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.30790001153945923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.04309999942779541,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.03229999914765358,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6572999954223633},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6444000005722046},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.45339998602867126},{"id":"https://openalex.org/keywords/processor-scheduling","display_name":"Processor scheduling","score":0.3813999891281128},{"id":"https://openalex.org/keywords/job-shop-scheduling","display_name":"Job shop scheduling","score":0.32850000262260437},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3199999928474426}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.792900025844574},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6572999954223633},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6444000005722046},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.45339998602867126},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44530001282691956},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.42309999465942383},{"id":"https://openalex.org/C2984822820","wikidata":"https://www.wikidata.org/wiki/Q1123036","display_name":"Processor scheduling","level":3,"score":0.3813999891281128},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3767000138759613},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.35659998655319214},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.32850000262260437},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3199999928474426},{"id":"https://openalex.org/C107568181","wikidata":"https://www.wikidata.org/wiki/Q5319000","display_name":"Dynamic priority scheduling","level":3,"score":0.3125},{"id":"https://openalex.org/C31689143","wikidata":"https://www.wikidata.org/wiki/Q733809","display_name":"Fair-share scheduling","level":3,"score":0.2879999876022339},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2565000057220459},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2554999887943268}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tnse.2026.3695132","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnse.2026.3695132","pdf_url":null,"source":{"id":"https://openalex.org/S2484352698","display_name":"IEEE Transactions on Network Science and Engineering","issn_l":"2327-4697","issn":["2327-4697","2334-329X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Network Science and Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Distributed":[0],"deep":[1],"neural":[2],"network":[3,20,128],"(DNN)":[4],"training":[5],"in":[6,146,162],"multi-tenant":[7],"cloud":[8],"environments":[9],"generates":[10],"long-lived,":[11],"bandwidth-intensive,":[12],"and":[13,42,65,85,143],"interdependent":[14],"flows":[15],"that":[16,27,95,104,157],"challenge":[17],"traditional":[18],"fairness-based":[19],"scheduling.":[21,71],"We":[22],"present":[23],"Gsched,":[24],"a":[25,74,86,113,117,154],"framework":[26],"orchestrates":[28],"centralized":[29,87],"planning":[30],"with":[31],"hardware":[32],"priority":[33,70],"enforcement":[34],"to":[35,67,78,89,112,136],"realize":[36],"communication":[37],"interleaving,":[38],"thereby":[39],"eliminating":[40],"contention":[41],"maximizing":[43],"pipeline":[44],"efficiency.":[45],"Unlike":[46],"methods":[47],"relying":[48],"on":[49],"complex":[50],"rate":[51],"control":[52],"or":[53],"custom":[54],"switch":[55,61],"hardware,":[56],"Gsched":[57,132,152],"leverages":[58],"standard":[59],"commodity":[60],"features":[62],"like":[63],"DSCP":[64],"SPQ":[66],"enforce":[68],"strict":[69],"It":[72],"employs":[73],"cold-start":[75],"profiling":[76],"mechanism":[77],"capture":[79],"task":[80],"characteristics":[81],"without":[82],"user":[83],"intervention,":[84],"solver":[88],"compute":[90],"the":[91,110,137],"optimal":[92],"interleaving":[93],"order":[94],"maximizes":[96],"Average":[97],"Transmission":[98],"Efficiency":[99],"(ATE).":[100],"Our":[101],"analysis":[102],"proves":[103],"this":[105],"prioritization":[106],"strategy":[107],"deterministically":[108],"drives":[109],"system":[111],"contention-free":[114],"state,":[115],"while":[116],"local":[118],"perturbation":[119],"theorem":[120],"guarantees":[121],"robustness":[122],"against":[123],"distributed":[124],"execution":[125],"conflicts":[126],"via":[127],"backpressure.":[129],"Across":[130],"workloads,":[131],"achieves":[133],"performance":[134],"close":[135],"state-of-the-art":[138],"Crux":[139],"under":[140],"heterogeneous":[141],"workloads":[142],"remains":[144],"competitive":[145],"highly":[147],"diverse":[148],"cases.":[149],"More":[150],"importantly,":[151],"provides":[153],"theoretical":[155],"foundation":[156],"justifies":[158],"scalable":[159],"per-bottleneck":[160],"scheduling":[161],"multi-bottleneck":[163],"networks.":[164]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-21T00:00:00"}
