{"id":"https://openalex.org/W7155358372","doi":"https://doi.org/10.48550/arxiv.2604.20796","title":"LLaDA2.0-Uni: Unifying Multimodal Understanding and Generation with Diffusion Large Language Model","display_name":"LLaDA2.0-Uni: Unifying Multimodal Understanding and Generation with Diffusion Large Language Model","publication_year":2026,"publication_date":"2026-04-22","ids":{"openalex":"https://openalex.org/W7155358372","doi":"https://doi.org/10.48550/arxiv.2604.20796"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.20796","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.20796","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.20796","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120299272","display_name":"Inclusion AI","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"AI, Inclusion","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134521093","display_name":"Tiwei Bie","orcid":"https://orcid.org/0009-0001-0286-1539"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bie, Tiwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134374425","display_name":"Haoxing Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Haoxing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134426255","display_name":"Tieyuan Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Tieyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134395353","display_name":"Zhenglin Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Zhenglin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124535738","display_name":"Long Cui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cui, Long","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061571440","display_name":"Kai Gan","orcid":"https://orcid.org/0000-0001-8414-7242"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gan, Kai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134371264","display_name":"Zhicheng Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Zhicheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134440813","display_name":"Zhenzhong Lan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lan, Zhenzhong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134375141","display_name":"Haoquan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Haoquan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134455138","display_name":"Jianguo Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jianguo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134415172","display_name":"Tao Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Tao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134421859","display_name":"Qi Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Qi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134415513","display_name":"Hongjun Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hongjun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134399466","display_name":"Xiaomei Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xiaomei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134428813","display_name":"Haoyuan Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Haoyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134367881","display_name":"Yi Xin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin, Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134383633","display_name":"Junbo Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Junbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":18,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6668000221252441,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6668000221252441,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.11670000106096268,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.05339999869465828,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6284000277519226},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.531499981880188},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.44510000944137573},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.4433000087738037},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.4415000081062317},{"id":"https://openalex.org/keywords/unified-model","display_name":"Unified Model","score":0.3337000012397766},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.3314000070095062},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.28279998898506165}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8102999925613403},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6284000277519226},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5407000184059143},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.531499981880188},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.44510000944137573},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4433000087738037},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.4415000081062317},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.35850000381469727},{"id":"https://openalex.org/C45493050","wikidata":"https://www.wikidata.org/wiki/Q7884934","display_name":"Unified Model","level":2,"score":0.3337000012397766},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.27720001339912415},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2671000063419342},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.26330000162124634},{"id":"https://openalex.org/C73000952","wikidata":"https://www.wikidata.org/wiki/Q17007827","display_name":"Discretization","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C2779478453","wikidata":"https://www.wikidata.org/wiki/Q6889748","display_name":"Modularity (biology)","level":2,"score":0.25760000944137573},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.20796","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.20796","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.20796","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.20796","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4174465239048004}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"present":[1],"LLaDA2.0-Uni,":[2],"a":[3,18,25,30,35,95,125],"unified":[4,132],"discrete":[5,28],"diffusion":[6,36,50],"large":[7],"language":[8],"model":[9,46],"(dLLM)":[10],"that":[11],"supports":[12],"multimodal":[13,105],"understanding":[14,106],"and":[15,34,54,82,94,114,122,127,136],"generation":[16,113,121],"within":[17,57],"natively":[19],"integrated":[20],"framework.":[21],"Its":[22,116],"architecture":[23],"combines":[24],"fully":[26],"semantic":[27],"tokenizer,":[29],"MoE-based":[31],"dLLM":[32],"backbone,":[33,59],"decoder.":[37,87],"By":[38],"discretizing":[39],"continuous":[40],"visual":[41,64],"inputs":[42,56],"via":[43],"SigLIP-VQ,":[44],"the":[45,58,61,80,86],"enables":[47],"block-level":[48],"masked":[49],"for":[51,119,130],"both":[52],"text":[53],"vision":[55],"while":[60,107],"decoder":[62],"reconstructs":[63],"tokens":[65],"into":[66],"high-fidelity":[67],"images.":[68],"Inference":[69],"efficiency":[70],"is":[71],"enhanced":[72],"beyond":[73],"parallel":[74],"decoding":[75],"through":[76],"prefix-aware":[77],"optimizations":[78],"in":[79,85,104,111],"backbone":[81],"few-step":[83],"distillation":[84],"Supported":[88],"by":[89],"carefully":[90],"curated":[91],"large-scale":[92],"data":[93],"tailored":[96],"multi-stage":[97],"training":[98],"pipeline,":[99],"LLaDA2.0-Uni":[100],"matches":[101],"specialized":[102],"VLMs":[103],"delivering":[108],"strong":[109],"performance":[110],"image":[112],"editing.":[115],"native":[117],"support":[118],"interleaved":[120],"reasoning":[123],"establishes":[124],"promising":[126],"scalable":[128],"paradigm":[129],"next-generation":[131],"foundation":[133],"models.":[134],"Codes":[135],"models":[137],"are":[138],"available":[139],"at":[140],"https://github.com/inclusionAI/LLaDA2.0-Uni.":[141]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-24T00:00:00"}
