{"id":"https://openalex.org/W4398191823","doi":"https://doi.org/10.48550/arxiv.2405.11273","title":"Uni-MoE: Scaling Unified Multimodal LLMs with Mixture of Experts","display_name":"Uni-MoE: Scaling Unified Multimodal LLMs with Mixture of Experts","publication_year":2024,"publication_date":"2024-05-18","ids":{"openalex":"https://openalex.org/W4398191823","doi":"https://doi.org/10.48550/arxiv.2405.11273"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2405.11273","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.11273","pdf_url":"https://arxiv.org/pdf/2405.11273","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2405.11273","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019703861","display_name":"Yunxin Li","orcid":"https://orcid.org/0000-0003-4819-2489"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Yunxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100593772","display_name":"Shenyuan Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Shenyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083079672","display_name":"Baotian Hu","orcid":"https://orcid.org/0000-0001-7490-684X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Baotian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088191810","display_name":"Longyue Wang","orcid":"https://orcid.org/0000-0002-9062-6183"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Longyue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101215538","display_name":"Wanqi Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Wanqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004450394","display_name":"Wenhan Luo","orcid":"https://orcid.org/0000-0002-5697-4168"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Wenhan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111023136","display_name":"Lin Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Lin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5108147788","display_name":"Min Zhang","orcid":"https://orcid.org/0000-0003-2864-2307"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Min","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5019703861"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9765999913215637,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.6355727910995483},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.34337079524993896},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18759268522262573}],"concepts":[{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.6355727910995483},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.34337079524993896},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18759268522262573},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2405.11273","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.11273","pdf_url":"https://arxiv.org/pdf/2405.11273","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2405.11273","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2405.11273","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2405.11273","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.11273","pdf_url":"https://arxiv.org/pdf/2405.11273","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4398191823.pdf","grobid_xml":"https://content.openalex.org/works/W4398191823.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,192,212],"Multimodal":[3],"Large":[4],"Language":[5],"Models":[6],"(MLLMs)":[7],"underscore":[8],"the":[9,27,59,68,101,119,155,169,206,216],"significance":[10],"of":[11,29,79,176,187,209],"scalable":[12],"models":[13],"and":[14,41,50,107,113,122,152,201,215],"data":[15,111,147],"to":[16,36,62,103,148],"boost":[17],"performance,":[18],"yet":[19],"this":[20],"often":[21],"incurs":[22],"substantial":[23,207],"computational":[24],"costs.":[25],"Although":[26],"Mixture":[28],"Experts":[30],"(MoE)":[31],"architecture":[32,99],"has":[33],"been":[34],"employed":[35],"efficiently":[37],"scale":[38],"large":[39],"language":[40],"image-text":[42],"models,":[43],"these":[44],"efforts":[45],"typically":[46],"involve":[47],"fewer":[48],"experts":[49,143],"limited":[51],"modalities.":[52,80],"To":[53,117],"address":[54],"this,":[55],"our":[56],"work":[57],"presents":[58],"pioneering":[60],"attempt":[61],"develop":[63],"a":[64,76,89,96,126,173],"unified":[65,90],"MLLM":[66],"with":[67,86,136,144],"MoE":[69,98,210],"architecture,":[70],"named":[71],"Uni-MoE":[72,156,171],"that":[73],"can":[74],"handle":[75],"wide":[77],"array":[78],"Specifically,":[81],"it":[82],"features":[83],"modality-specific":[84,142],"encoders":[85],"connectors":[87,135],"for":[88],"multimodal":[91,164,177,195],"representation.":[92],"We":[93,167],"also":[94],"implement":[95],"sparse":[97],"within":[100],"LLMs":[102],"enable":[104],"efficient":[105],"training":[106,128],"inference":[108],"through":[109],"modality-level":[110],"parallelism":[112],"expert-level":[114],"model":[115],"parallelism.":[116],"enhance":[118],"multi-expert":[120,199],"collaboration":[121,200],"generalization,":[123],"we":[124],"present":[125],"progressive":[127],"strategy:":[129],"1)":[130],"Cross-modality":[131],"alignment":[132],"using":[133],"various":[134],"different":[137],"cross-modality":[138,145],"data,":[139],"2)":[140],"Training":[141],"instruction":[146,165],"activate":[149],"experts'":[150],"preferences,":[151],"3)":[153],"Tuning":[154],"framework":[157],"utilizing":[158],"Low-Rank":[159],"Adaptation":[160],"(LoRA)":[161],"on":[162,172],"mixed":[163,194],"data.":[166],"evaluate":[168],"instruction-tuned":[170],"comprehensive":[174],"set":[175],"datasets.":[178],"The":[179],"extensive":[180],"experimental":[181],"results":[182],"demonstrate":[183],"Uni-MoE's":[184],"principal":[185],"advantage":[186],"significantly":[188],"reducing":[189],"performance":[190],"bias":[191],"handling":[193],"datasets,":[196],"alongside":[197],"improved":[198],"generalization.":[202],"Our":[203],"findings":[204],"highlight":[205],"potential":[208],"frameworks":[211],"advancing":[213],"MLLMs":[214],"code":[217],"is":[218],"available":[219],"at":[220],"https://github.com/HITsz-TMG/UMOE-Scaling-Unified-Multimodal-LLMs.":[221]},"counts_by_year":[{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
