{"id":"https://openalex.org/W7161744511","doi":"https://doi.org/10.48550/arxiv.2605.17923","title":"AdaptiveLoad: Towards Efficient Video Diffusion Transformer Training","display_name":"AdaptiveLoad: Towards Efficient Video Diffusion Transformer Training","publication_year":2026,"publication_date":"2026-05-18","ids":{"openalex":"https://openalex.org/W7161744511","doi":"https://doi.org/10.48550/arxiv.2605.17923"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.17923","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17923","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.17923","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136467303","display_name":"Yucheng Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Yucheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136473703","display_name":"Yongjian Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Yongjian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125195887","display_name":"Zhong Guan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guan, Zhong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136457469","display_name":"Haoran Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Haoran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136483694","display_name":"Wen Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Wen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136476076","display_name":"Wanting Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Wanting","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136493839","display_name":"Jing Long","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Long, Jing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041877213","display_name":"Shuai Di","orcid":"https://orcid.org/0000-0001-7466-9709"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Di, Shuai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5102937576","display_name":"Junwu Xiong","orcid":"https://orcid.org/0009-0008-2028-510X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Junwu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4731999933719635,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4731999933719635,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.11789999902248383,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.0681999996304512,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.5649999976158142},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.4648999869823456},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.44830000400543213},{"id":"https://openalex.org/keywords/quadratic-equation","display_name":"Quadratic equation","score":0.41029998660087585},{"id":"https://openalex.org/keywords/quadratic-programming","display_name":"Quadratic programming","score":0.38999998569488525},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.38589999079704285},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.3601999878883362},{"id":"https://openalex.org/keywords/peak-load","display_name":"Peak load","score":0.3472000062465668}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7245000004768372},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.5649999976158142},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.4648999869823456},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.44830000400543213},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.4374000132083893},{"id":"https://openalex.org/C129844170","wikidata":"https://www.wikidata.org/wiki/Q41299","display_name":"Quadratic equation","level":2,"score":0.41029998660087585},{"id":"https://openalex.org/C81845259","wikidata":"https://www.wikidata.org/wiki/Q290117","display_name":"Quadratic programming","level":2,"score":0.38999998569488525},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.38589999079704285},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3601999878883362},{"id":"https://openalex.org/C2986689482","wikidata":"https://www.wikidata.org/wiki/Q1806775","display_name":"Peak load","level":2,"score":0.3472000062465668},{"id":"https://openalex.org/C127964446","wikidata":"https://www.wikidata.org/wiki/Q1092142","display_name":"Computational resource","level":3,"score":0.3425999879837036},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.3416999876499176},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.33799999952316284},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.33709999918937683},{"id":"https://openalex.org/C3017489831","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Running time","level":2,"score":0.3034000098705292},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2980000078678131},{"id":"https://openalex.org/C198927703","wikidata":"https://www.wikidata.org/wiki/Q4373881","display_name":"Sequential quadratic programming","level":3,"score":0.2847999930381775},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.27410000562667847},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.2687999904155731},{"id":"https://openalex.org/C311688","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Time complexity","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.25929999351501465}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.17923","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17923","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.17923","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17923","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"video":[1,9],"generation":[2],"models,":[3,6],"particularly":[4],"world":[5,129],"training":[7,155],"large-scale":[8],"diffusion":[10],"Transformers":[11],"(such":[12],"as":[13],"DiT":[14],"and":[15,61,95,119,151],"MMDiT)":[16],"poses":[17],"significant":[18],"computational":[19,96,137],"challenges":[20],"due":[21],"to":[22,47,57,116,142],"the":[23,50,126,136],"extreme":[24],"variance":[25],"in":[26],"sequence":[27],"lengths":[28],"within":[29],"mixed-mode":[30],"datasets.":[31],"Existing":[32],"bucket-based":[33],"data":[34],"loading":[35],"strategies":[36],"typically":[37],"rely":[38],"on":[39,125],"\"equal":[40],"token":[41],"length\"":[42],"constraints.":[43],"This":[44,66],"approach":[45],"fails":[46],"account":[48],"for":[49],"quadratic":[51],"complexity":[52],"of":[53,63,75,158],"self-attention":[54],"mechanisms,":[55],"leading":[56],"severe":[58],"load":[59,83,97],"imbalance":[60,138],"underutilization":[62],"GPU":[64],"resources.":[65],"paper":[67],"proposes":[68],"\\textit{AdaptiveLoad},":[69],"an":[70,153],"integrated":[71],"optimization":[72],"framework":[73],"consisting":[74],"two":[76],"core":[77],"components:":[78],"(1)":[79],"A":[80,104],"dual-constraint":[81],"adaptive":[82],"balancing":[84],"system,":[85],"which":[86,109],"eliminates":[87],"long-sequence":[88],"bottlenecks":[89],"by":[90,149],"simultaneously":[91],"limiting":[92],"memory":[93,121],"consumption":[94],"($B":[98],"\\times":[99],"S^p":[100],"\\le":[101],"M_{\\text{comp}}$);":[102],"(2)":[103],"fused":[105],"LayerNorm-Modulate":[106],"CUDA":[107],"kernel,":[108],"utilizes":[110],"a":[111],"D-tile":[112],"coalesced":[113],"reduction":[114],"strategy":[115],"increase":[117,157],"throughput":[118,156],"alleviate":[120],"pressure.":[122],"Experimental":[123],"results":[124],"Wan":[127],"2.1":[128],"model":[130],"demonstrate":[131],"that":[132],"our":[133],"method":[134],"reduces":[135],"rate":[139],"from":[140],"39\\%":[141],"18.9\\%,":[143],"improves":[144],"peak":[145],"VRAM":[146],"utilization":[147],"efficiency":[148],"22.7\\%,":[150],"achieves":[152],"overall":[154],"27.2\\%.":[159]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-20T00:00:00"}
