{"id":"https://openalex.org/W7141184231","doi":"https://doi.org/10.48550/arxiv.2603.25706","title":"Wan-Weaver: Interleaved Multi-modal Generation via Decoupled Training","display_name":"Wan-Weaver: Interleaved Multi-modal Generation via Decoupled Training","publication_year":2026,"publication_date":"2026-03-26","ids":{"openalex":"https://openalex.org/W7141184231","doi":"https://doi.org/10.48550/arxiv.2603.25706"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.25706","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.25706","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.25706","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066086464","display_name":"Jinbo Xing","orcid":"https://orcid.org/0000-0002-2181-1879"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xing, Jinbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036869159","display_name":"Zeyinzi Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Zeyinzi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130737687","display_name":"Yuxiang Tuo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tuo, Yuxiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130788992","display_name":"Chaojie Mao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mao, Chaojie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130724067","display_name":"Xiaotang Gai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gai, Xiaotang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130759527","display_name":"Xi Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130795506","display_name":"Jingfeng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jingfeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130768017","display_name":"Yulin Pan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pan, Yulin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130752507","display_name":"Zhen Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Zhen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130742643","display_name":"Jie Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130785292","display_name":"Keyu Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Keyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129828141","display_name":"Chenwei Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Chenwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000186639","display_name":"Chongyang Zhong","orcid":"https://orcid.org/0000-0003-0020-1892"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Chongyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130744825","display_name":"Kai Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130749095","display_name":"Tong Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Tong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019994258","display_name":"Lianghua Huang","orcid":"https://orcid.org/0000-0002-9686-9354"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Lianghua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130799789","display_name":"Yu Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130752324","display_name":"Yujiu Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yujiu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":18,"corresponding_author_ids":["https://openalex.org/A5066086464"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9347000122070312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9347000122070312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.031300000846385956,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.003100000089034438,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/planner","display_name":"Planner","score":0.7157999873161316},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.6940000057220459},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5705999732017517},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5281000137329102},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4632999897003174},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.4025999903678894}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7477999925613403},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.7157999873161316},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.6940000057220459},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5705999732017517},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5382999777793884},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5281000137329102},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4632999897003174},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.4025999903678894},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37770000100135803},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.36880001425743103},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.34619998931884766},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3276999890804291},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.2955999970436096},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.27230000495910645}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.25706","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.25706","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.25706","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.25706","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","score":0.45913833379745483,"id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"unified":[1],"models":[2],"have":[3],"made":[4],"unprecedented":[5],"progress":[6],"in":[7,102,163],"both":[8],"understanding":[9,141],"and":[10,39,58,62,70,108,133,142,155],"generation.":[11],"However,":[12],"while":[13,82],"most":[14],"of":[15,28,42,67,139,176],"them":[16],"accept":[17],"multi-modal":[18],"inputs,":[19],"they":[20],"typically":[21],"produce":[22],"only":[23],"single-modality":[24],"outputs.":[25],"This":[26],"challenge":[27],"producing":[29],"interleaved":[30,53,95,126,164,192],"content":[31,99],"is":[32,100],"mainly":[33],"due":[34],"to":[35,104,113,121,150,189],"training":[36,147],"data":[37,96,112,144],"scarcity":[38],"the":[40,83,106,115,137,160],"difficulty":[41],"modeling":[43],"long-range":[44,130],"cross-modal":[45],"context.":[46],"To":[47,158],"address":[48],"this":[49,89],"issue,":[50],"we":[51,91,166],"decompose":[52],"generation":[54,127,143,156],"into":[55,145],"textual":[56,77,131],"planning":[57],"visual":[59,80,98,134],"consistency":[60],"modeling,":[61],"introduce":[63],"a":[64,68,71,169,173],"framework":[65],"consisting":[66],"planner":[69,74,146],"visualizer.":[72,116],"The":[73],"produces":[75],"dense":[76],"descriptions":[78],"for":[79],"content,":[81],"visualizer":[84],"synthesizes":[85],"images":[86],"accordingly.":[87],"Under":[88],"guidance,":[90],"construct":[92,168],"large-scale":[93],"textual-proxy":[94],"(where":[97],"represented":[101],"text)":[103],"train":[105,114],"planner,":[107],"curate":[109],"reference-guided":[110],"image":[111],"These":[117],"designs":[118],"give":[119],"rise":[120],"Wan-Weaver,":[122],"which":[123],"exhibits":[124],"emergent":[125],"ability":[128],"with":[129],"coherence":[132],"consistency.":[135],"Meanwhile,":[136],"integration":[138],"diverse":[140],"enables":[148],"Wan-Weaver":[149,194],"achieve":[151],"robust":[152],"task":[153],"reasoning":[154],"proficiency.":[157],"assess":[159],"model's":[161],"capability":[162],"generation,":[165],"further":[167],"benchmark":[170],"that":[171],"spans":[172],"wide":[174],"range":[175],"use":[177],"cases":[178],"across":[179],"multiple":[180],"dimensions.":[181],"Extensive":[182],"experiments":[183],"demonstrate":[184],"that,":[185],"even":[186],"without":[187],"access":[188],"any":[190],"real":[191],"data,":[193],"achieves":[195],"superior":[196],"performance":[197],"over":[198],"existing":[199],"methods.":[200]},"counts_by_year":[],"updated_date":"2026-03-28T06:16:51.555046","created_date":"2026-03-28T00:00:00"}
