{"id":"https://openalex.org/W7161113393","doi":"https://doi.org/10.48550/arxiv.2605.13565","title":"Qwen-Image-VAE-2.0 Technical Report","display_name":"Qwen-Image-VAE-2.0 Technical Report","publication_year":2026,"publication_date":"2026-05-13","ids":{"openalex":"https://openalex.org/W7161113393","doi":"https://doi.org/10.48550/arxiv.2605.13565"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.13565","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13565","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.13565","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136126661","display_name":"Zekai Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zekai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135951014","display_name":"Deqing Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Deqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135922623","display_name":"Kuan Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Kuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136152072","display_name":"Yujia Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Yujia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136142747","display_name":"Chenfei Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Chenfei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136116014","display_name":"Yu Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136107856","display_name":"Liang Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Liang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136180661","display_name":"Hao Meng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136185978","display_name":"Jiahao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jiahao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136167458","display_name":"Jie M. Zhang","orcid":"https://orcid.org/0000-0003-3800-697X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136089843","display_name":"Kaiyuan Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Kaiyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136157682","display_name":"Kun Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Kun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109646328","display_name":"Lihan Jiang","orcid":"https://orcid.org/0009-0001-2899-273X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Lihan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135909706","display_name":"Ningyuan Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Ningyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136104442","display_name":"Shengming Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Shengming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063972231","display_name":"Tianhe Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Tianhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136107312","display_name":"Xiao Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Xiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136183685","display_name":"Xiaoyue Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xiaoyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136152506","display_name":"Yan Shu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shu, Yan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071604321","display_name":"Yanran Zhang","orcid":"https://orcid.org/0000-0001-7001-8079"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yanran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136094544","display_name":"Yilei Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yilei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122065243","display_name":"Yixian Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yixian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136117953","display_name":"Yuxiang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yuxiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136137363","display_name":"Zhendong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhendong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136183812","display_name":"Zihao Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zihao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136114378","display_name":"Zikai Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Zikai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136138933","display_name":"Yiliang Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Yiliang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136133259","display_name":"Yi Wang","orcid":"https://orcid.org/0000-0003-1039-3330"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136095453","display_name":"Xiaoxiao Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Xiaoxiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136162205","display_name":"Lin Qu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qu, Lin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":30,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8709999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8709999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.025200000032782555,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.01549999974668026,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.6703000068664551},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6682000160217285},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.5843999981880188},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.566100001335144},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.3856000006198883},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.3815000057220459}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7156000137329102},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.6703000068664551},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6682000160217285},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.5843999981880188},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.566100001335144},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4821999967098236},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43459999561309814},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.3856000006198883},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3815000057220459},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.3433000147342682},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3310999870300293},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3131999969482422},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.3084999918937683},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2971000075340271},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.2944999933242798},{"id":"https://openalex.org/C182449105","wikidata":"https://www.wikidata.org/wiki/Q3099732","display_name":"Technical report","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.2919999957084656},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.26739999651908875},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.25839999318122864},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.25699999928474426},{"id":"https://openalex.org/C13736549","wikidata":"https://www.wikidata.org/wiki/Q4489420","display_name":"Industrial engineering","level":1,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.13565","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13565","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.13565","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13565","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11","score":0.6530824899673462}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,104],"present":[1,105],"Qwen-Image-VAE-2.0,":[2],"a":[3,52,106,124,128,181],"suite":[4],"of":[5,25,48,67,109,131],"high-compression":[6,175],"Variational":[7],"Autoencoders":[8],"(VAEs)":[9],"that":[10],"achieve":[11],"significant":[12],"advances":[13],"in":[14,59,118,148],"both":[15,149],"reconstruction":[16,23,113,143],"fidelity":[17],"and":[18,38,50,96,152,189],"diffusability.":[19,191],"To":[20,62,88,115],"address":[21],"the":[22,64,80],"bottlenecks":[24],"high":[26,156,185],"compression,":[27,186],"we":[28,43,71,92,121],"adopt":[29],"an":[30,73,94],"improved":[31],"architecture":[32],"featuring":[33],"Global":[34],"Skip":[35],"Connections":[36],"(GSC)":[37],"expanded":[39],"latent":[40,69,81],"channels.":[41],"Moreover,":[42],"scale":[44],"training":[45],"to":[46,56,78,85,100,173],"billions":[47],"images":[49],"incorporate":[51],"synthetic":[53],"rendering":[54],"engine":[55],"improve":[57],"performance":[58,117],"text-rich":[60,119,153],"scenarios.":[61],"tackle":[63],"convergence":[65,171],"challenges":[66],"high-dimensional":[68],"space,":[70],"implement":[72],"enhanced":[74],"semantic":[75],"alignment":[76],"strategy":[77],"make":[79],"space":[82],"highly":[83],"amenable":[84],"diffusion":[86],"modeling.":[87],"optimize":[89],"computational":[90],"efficiency,":[91],"leverage":[93],"asymmetric":[95],"attention-free":[97],"encoder-decoder":[98],"backbone":[99],"minimize":[101],"encoding":[102],"overhead.":[103],"comprehensive":[107],"evaluation":[108,138],"Qwen-Image-VAE-2.0":[110,140,179],"on":[111],"public":[112],"benchmarks.":[114],"evaluate":[116],"scenarios,":[120],"propose":[122],"OmniDoc-TokenBench,":[123],"new":[125],"benchmark":[126],"comprising":[127],"diverse":[129],"collection":[130],"real-world":[132],"documents":[133],"coupled":[134],"with":[135,184],"specialized":[136],"OCR-based":[137],"metrics.":[139],"achieves":[141],"state-of-the-art":[142],"performance,":[144],"demonstrating":[145],"exceptional":[146,190],"capabilities":[147],"general":[150],"domains":[151],"scenarios":[154],"at":[155],"compression":[157],"ratio.":[158],"Furthermore,":[159],"downstream":[160],"DiT":[161],"experiments":[162],"reveal":[163],"our":[164],"models":[165],"possess":[166],"superior":[167,187],"diffusability,":[168],"significantly":[169],"accelerating":[170],"compared":[172],"existing":[174],"baselines.":[176],"These":[177],"establish":[178],"as":[180],"leading":[182],"model":[183],"reconstruction,":[188]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-15T00:00:00"}
