{"id":"https://openalex.org/W7155505707","doi":"https://doi.org/10.1145/3767295.3803587","title":"MegaScale-Omni: A Hyper-Scale, Workload-Resilient System for MultiModal LLM Training in Production","display_name":"MegaScale-Omni: A Hyper-Scale, Workload-Resilient System for MultiModal LLM Training in Production","publication_year":2026,"publication_date":"2026-04-24","ids":{"openalex":"https://openalex.org/W7155505707","doi":"https://doi.org/10.1145/3767295.3803587"},"language":null,"primary_location":{"id":"doi:10.1145/3767295.3803587","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3767295.3803587","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st European Conference on Computer Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3767295.3803587","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109042106","display_name":"C Y Xue","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chunyu Xue","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0008-9272-1732","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062504314","display_name":"Yangrui Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yangrui Chen","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0007-6682-1783","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091837703","display_name":"Jianyu Jiang","orcid":"https://orcid.org/0000-0002-8684-8509"},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianyu Jiang","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-8684-8509","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018118687","display_name":"Ningxin Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ningxin Zheng","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0004-1147-6984","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017329016","display_name":"Junda Feng","orcid":"https://orcid.org/0000-0003-3664-6615"},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junda Feng","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-3664-6615","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134497052","display_name":"Jingji Chen","orcid":"https://orcid.org/0000-0003-4135-4684"},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingji Chen","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-4135-4684","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088629918","display_name":"Shixiong Zhao","orcid":"https://orcid.org/0000-0002-1643-2583"},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shixiong Zhao","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-1643-2583","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070076188","display_name":"Shen Yan","orcid":"https://orcid.org/0000-0002-3920-1825"},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shen Yan","raw_affiliation_strings":["Bytedance seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0003-3006-8217","affiliations":[{"raw_affiliation_string":"Bytedance seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134470983","display_name":"Yi Lin","orcid":"https://orcid.org/0009-0005-6591-5863"},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Lin","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0005-6591-5863","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134534447","display_name":"Lei Shi","orcid":"https://orcid.org/0009-0001-2392-5300"},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Shi","raw_affiliation_strings":["Bytedance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0001-2392-5300","affiliations":[{"raw_affiliation_string":"Bytedance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053078653","display_name":"Zanbo Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zanbo Wang","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0007-0037-7720","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134514664","display_name":"Lishu Luo","orcid":"https://orcid.org/0009-0009-0313-9423"},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lishu Luo","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0009-0313-9423","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087059403","display_name":"Faming Wu","orcid":"https://orcid.org/0000-0003-4256-3277"},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Faming Wu","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-4256-3277","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084047386","display_name":"Haibin Lin","orcid":"https://orcid.org/0000-0003-4879-5335"},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haibin Lin","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-4879-5335","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013856812","display_name":"Yanghua Peng","orcid":"https://orcid.org/0000-0003-3989-4358"},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanghua Peng","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-3989-4358","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113343221","display_name":"Xin Liu","orcid":"https://orcid.org/0009-0000-8346-3323"},"institutions":[{"id":"https://openalex.org/I4210128111","display_name":"Clover Seed (China)","ror":"https://ror.org/037jwt041","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128111"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Liu","raw_affiliation_strings":["ByteDance Seed, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0000-8346-3323","affiliations":[{"raw_affiliation_string":"ByteDance Seed, Shanghai, China","institution_ids":["https://openalex.org/I4210128111"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100377856","display_name":"Quan Chen","orcid":"https://orcid.org/0000-0003-2034-0371"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Chen","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-5832-0347","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":17,"corresponding_author_ids":["https://openalex.org/A5109042106"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.78226431,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"675","last_page":"692"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10763","display_name":"Digital Transformation in Industry","score":0.037700001150369644,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10763","display_name":"Digital Transformation in Industry","score":0.037700001150369644,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10525","display_name":"Human-Automation Interaction and Safety","score":0.035100001841783524,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.028300000354647636,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5709999799728394},{"id":"https://openalex.org/keywords/production","display_name":"Production (economics)","score":0.5526999831199646},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.30869999527931213},{"id":"https://openalex.org/keywords/production-system","display_name":"Production system (computer science)","score":0.2994999885559082},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.2840999960899353},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.28369998931884766}],"concepts":[{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5709999799728394},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.5526999831199646},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5234000086784363},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.32120001316070557},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C2776842983","wikidata":"https://www.wikidata.org/wiki/Q581319","display_name":"Production system (computer science)","level":3,"score":0.2994999885559082},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29679998755455017},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.27379998564720154},{"id":"https://openalex.org/C2776857766","wikidata":"https://www.wikidata.org/wiki/Q7832987","display_name":"Training system","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3767295.3803587","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3767295.3803587","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st European Conference on Computer Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2605.08962","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2605.08962","pdf_url":"https://arxiv.org/pdf/2605.08962","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3767295.3803587","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3767295.3803587","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st European Conference on Computer Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2116667771","https://openalex.org/W4288083516","https://openalex.org/W4386768656","https://openalex.org/W4412886767","https://openalex.org/W4413757087"],"related_works":[],"abstract_inverted_index":{"As":[0],"the":[1,51,75,104,156],"foundational":[2],"component":[3],"of":[4,42,78,126,158,167],"versatile":[5],"AI":[6],"applications,":[7],"training":[8,61,76,163],"an":[9,58],"multimodal":[10,17],"large":[11],"language":[12],"model":[13,46],"(MLLM)":[14],"relies":[15],"on":[16],"datasets":[18],"with":[19,81,89,130,165],"dynamic":[20,35,65,178],"modality":[21],"mixture":[22],"proportions":[23],"and":[24,45,50,68,99,122,144],"sample":[25],"length":[26],"distributions.":[27],"However,":[28],"existing":[29],"MLLM":[30,60,162],"systems":[31],"remain":[32],"inefficient":[33],"under":[34,109,176],"workloads,":[36,179],"due":[37],"to":[38,95,149,182],"statically":[39],"coupled":[40],"decisions":[41],"resource":[43],"allocation":[44],"parallelization":[47,112],"between":[48],"encoders":[49,94],"LLM":[52,105,150],"backbone.":[53],"This":[54],"paper":[55],"presents":[56],"MegaScale-Omni,":[57],"industrial-grade":[59],"system":[62],"tailored":[63],"for":[64,93,103,118],"workload":[66,131],"adaption":[67],"hyper-scale":[69],"deployment.":[70],"MegaScale-Omni":[71,152],"is":[72,153],"built":[73],"upon":[74],"scheme":[77],"encoder-LLM":[79,116,127],"multiplexing":[80],"three":[82],"key":[83],"innovations:":[84],"(1)":[85],"Decoupled":[86],"parallelism":[87,92,102],"strategies":[88],"long-short":[90],"sequence":[91],"process":[96],"variable-length":[97],"samples,":[98],"full-fledged":[100],"5D":[101],"backbone,":[106],"both":[107],"organized":[108],"a":[110,123],"communication-efficient":[111],"layout.":[113],"(2)":[114],"Unified":[115],"representations":[117],"flexible,":[119],"extensible":[120],"colocation,":[121],"new":[124],"paradigm":[125],"joint":[128],"pipeline":[129],"resilience.":[132],"(3)":[133],"Workload":[134],"balancing":[135],"techniques":[136],"via":[137],"decentralized":[138],"grouped":[139],"reordering":[140],"in":[141],"data":[142],"loaders":[143],"adaptive":[145],"resharding":[146],"from":[147],"encoder":[148],"ranks.":[151],"deployed":[154],"as":[155,180],"foundation":[157],"our":[159],"in-house":[160],"large-scale":[161],"tasks":[164],"thousands":[166],"GPUs.":[168],"Our":[169],"experimental":[170],"results":[171],"demonstrate":[172],"1.27\u00d7\u20137.57\u00d7":[173],"throughput":[174],"improvement":[175],"production-grade":[177],"compared":[181],"four":[183],"state-of-the-art":[184],"systems.":[185]},"counts_by_year":[],"updated_date":"2026-05-15T08:27:34.491423","created_date":"2026-04-25T00:00:00"}
