{"id":"https://openalex.org/W4417070545","doi":"https://doi.org/10.1145/3769802","title":"Hydraulis: Balancing Large Transformer Model Training via Co-designing Parallel Strategies and Data Assignment","display_name":"Hydraulis: Balancing Large Transformer Model Training via Co-designing Parallel Strategies and Data Assignment","publication_year":2025,"publication_date":"2025-12-04","ids":{"openalex":"https://openalex.org/W4417070545","doi":"https://doi.org/10.1145/3769802"},"language":"en","primary_location":{"id":"doi:10.1145/3769802","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3769802","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029750339","display_name":"Haoyang Li","orcid":"https://orcid.org/0009-0001-5342-0194"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haoyang Li","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-5342-0194","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039254679","display_name":"Fangcheng Fu","orcid":"https://orcid.org/0000-0003-1658-0380"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangcheng Fu","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-1658-0380","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111914292","display_name":"Sheng Lin","orcid":"https://orcid.org/0009-0002-1495-0499"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sheng Lin","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-1495-0499","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108863265","display_name":"Houyang Ge","orcid":"https://orcid.org/0009-0001-3367-7486"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Ge","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-3367-7486","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089836510","display_name":"X. Wang","orcid":"https://orcid.org/0009-0006-6458-7033"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuanyu Wang","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-6458-7033","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033141830","display_name":"Jiawen Niu","orcid":"https://orcid.org/0009-0003-0922-1942"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiawen Niu","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-0922-1942","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025328651","display_name":"Jinbao Xue","orcid":"https://orcid.org/0009-0003-4087-9873"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinbao Xue","raw_affiliation_strings":["Tencent, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-4087-9873","affiliations":[{"raw_affiliation_string":"Tencent, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077922409","display_name":"Yangyu Tao","orcid":"https://orcid.org/0009-0003-0536-4321"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yangyu Tao","raw_affiliation_strings":["Tencent, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-0536-4321","affiliations":[{"raw_affiliation_string":"Tencent, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063795716","display_name":"D. Wang","orcid":"https://orcid.org/0009-0003-2330-6854"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Di Wang","raw_affiliation_strings":["Tencent, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-2330-6854","affiliations":[{"raw_affiliation_string":"Tencent, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037387576","display_name":"Jie Jiang","orcid":"https://orcid.org/0000-0001-9658-5127"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Jiang","raw_affiliation_strings":["Tencent, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-9658-5127","affiliations":[{"raw_affiliation_string":"Tencent, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062357883","display_name":"Bin Cui","orcid":"https://orcid.org/0000-0003-1681-4677"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Cui","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-1681-4677","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5029750339"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38008597,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"3","issue":"6","first_page":"1","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.19439999759197235,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.19439999759197235,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.16750000417232513,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.06289999932050705,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6247000098228455},{"id":"https://openalex.org/keywords/quadratic-equation","display_name":"Quadratic equation","score":0.48570001125335693},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.48019999265670776},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.47839999198913574},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4668999910354614},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.3962000012397766},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.38359999656677246},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.34380000829696655}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8025000095367432},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6247000098228455},{"id":"https://openalex.org/C129844170","wikidata":"https://www.wikidata.org/wiki/Q41299","display_name":"Quadratic equation","level":2,"score":0.48570001125335693},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.48019999265670776},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.47839999198913574},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4668999910354614},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.40950000286102295},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3962000012397766},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.38359999656677246},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37560001015663147},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.34380000829696655},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.34310001134872437},{"id":"https://openalex.org/C81845259","wikidata":"https://www.wikidata.org/wiki/Q290117","display_name":"Quadratic programming","level":2,"score":0.33469998836517334},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.32330000400543213},{"id":"https://openalex.org/C311688","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Time complexity","level":2,"score":0.3156999945640564},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.3125999867916107},{"id":"https://openalex.org/C2985946229","wikidata":"https://www.wikidata.org/wiki/Q49908","display_name":"Data sampling","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28940001130104065},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.28769999742507935},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28679999709129333},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C3020136221","wikidata":"https://www.wikidata.org/wiki/Q186588","display_name":"Time sequence","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2599000036716461},{"id":"https://openalex.org/C138959212","wikidata":"https://www.wikidata.org/wiki/Q1806783","display_name":"Load balancing (electrical power)","level":3,"score":0.25380000472068787}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3769802","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3769802","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2005908084","https://openalex.org/W2047281923","https://openalex.org/W2156094106","https://openalex.org/W2995558816","https://openalex.org/W3081168214","https://openalex.org/W3122286897","https://openalex.org/W3193985311","https://openalex.org/W3204998121","https://openalex.org/W4310282800","https://openalex.org/W4312060029","https://openalex.org/W4327930469","https://openalex.org/W4386768656","https://openalex.org/W4387321115","https://openalex.org/W4388041378","https://openalex.org/W4391164389","https://openalex.org/W4392207935","https://openalex.org/W4394923298","https://openalex.org/W4395117348","https://openalex.org/W4400869738","https://openalex.org/W4400893425","https://openalex.org/W4401812232","https://openalex.org/W4404340628","https://openalex.org/W4406800520","https://openalex.org/W4407356947","https://openalex.org/W4408893341","https://openalex.org/W4408925916","https://openalex.org/W4411403514","https://openalex.org/W4411700982","https://openalex.org/W4413953195","https://openalex.org/W4415798027"],"related_works":[],"abstract_inverted_index":{"To":[0,75],"optimize":[1],"large":[2,97],"Transformer":[3],"model":[4,98,141],"training,":[5],"both":[6,31,137],"efficient":[7],"parallel":[8,87,103],"computing":[9],"and":[10,23,33,67,89,113,139],"advanced":[11],"data":[12,41,55,90,123],"management":[13],"are":[14],"indispensable.":[15],"However,":[16],"current":[17],"methods":[18],"often":[19],"assume":[20],"a":[21,121,128],"stable":[22],"uniform":[24],"training":[25,38,52,99,115,135],"workload,":[26],"neglecting":[27],"data-induced":[28],"imbalances-arising":[29],"from":[30,45,59],"sampling":[32,42],"packing":[34,56],"processes-which":[35],"can":[36],"impede":[37],"performance.":[39],"Specifically,":[40],"imbalance":[43,57,78],"arises":[44],"uneven":[46],"sequence":[47,109],"length":[48,110],"distribution":[49],"of":[50,71,133],"the":[51,60,63,72,86,108,134],"data,":[53],"while":[54],"stems":[58],"discrepancy":[61],"between":[62],"linear":[64],"memory":[65],"complexity":[66,70],"quadratic":[68],"time":[69],"attention":[73],"mechanism.":[74],"address":[76],"these":[77],"issues,":[79],"we":[80,95,119],"develop":[81],"Hydraulis,":[82],"which":[83,126],"jointly":[84],"optimizes":[85],"strategies":[88,104],"assignment.":[91],"For":[92,117],"one":[93],"thing,":[94],"introduce":[96],"with":[100],"dynamic":[101],"heterogeneous":[102],"in":[105,131],"response":[106],"to":[107],"variations":[111],"within":[112,138],"across":[114,140],"iterations.":[116],"another,":[118],"devise":[120],"two-stage":[122],"assignment":[124],"approach,":[125],"strikes":[127],"good":[129],"balance":[130],"terms":[132],"workloads":[136],"replicas.":[142],"Empirical":[143],"results":[144],"demonstrate":[145],"that":[146],"Hydraulis":[147],"outperforms":[148],"existing":[149],"systems":[150],"by":[151],"1.32-2.66\u00d7.":[152],"Our":[153],"source":[154],"code":[155],"is":[156],"available:":[157],"https://github.com/PKU-DAIR/Hetu.":[158]},"counts_by_year":[],"updated_date":"2025-12-06T23:14:57.273132","created_date":"2025-12-06T00:00:00"}
