{"id":"https://openalex.org/W7129469189","doi":"https://doi.org/10.48550/arxiv.2602.13818","title":"VAR-3D: View-aware Auto-Regressive Model for Text-to-3D Generation via a 3D Tokenizer","display_name":"VAR-3D: View-aware Auto-Regressive Model for Text-to-3D Generation via a 3D Tokenizer","publication_year":2026,"publication_date":"2026-02-14","ids":{"openalex":"https://openalex.org/W7129469189","doi":"https://doi.org/10.48550/arxiv.2602.13818"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.13818","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126217787","display_name":"Zongcheng Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Zongcheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126217449","display_name":"Dongyan Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Dongyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126268343","display_name":"Haoran Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Haoran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126181324","display_name":"Yu Hong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong, Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4433000087738037,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4433000087738037,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.30889999866485596,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.06679999828338623,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/view-synthesis","display_name":"View synthesis","score":0.6021999716758728},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.499099999666214},{"id":"https://openalex.org/keywords/vector-quantization","display_name":"Vector quantization","score":0.44920000433921814},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.43970000743865967},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.39980000257492065},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.3741999864578247},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.3718999922275543},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.36660000681877136}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.64410001039505},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.6021999716758728},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.499099999666214},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47209998965263367},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.44920000433921814},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.43970000743865967},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.39980000257492065},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.3741999864578247},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3718999922275543},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.36660000681877136},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.35830000042915344},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.33660000562667847},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33250001072883606},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.32519999146461487},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3174999952316284},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.3100999891757965},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2930999994277954},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.2849999964237213},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.2842999994754791},{"id":"https://openalex.org/C3019007443","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3d model","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.27390000224113464},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.27219998836517334}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.13818","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.13818","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.13818","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.13818","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,9,21,147],"auto-regressive":[3,74],"transformers":[4],"have":[5],"achieved":[6],"remarkable":[7],"success":[8],"generative":[10,124],"modeling.":[11],"However,":[12],"text-to-3D":[13],"generation":[14,149],"remains":[15],"challenging,":[16],"primarily":[17],"due":[18],"to":[19,95,126,135],"bottlenecks":[20],"learning":[22],"discrete":[23,105,116],"3D":[24,58,84,90,102],"representations.":[25],"Specifically,":[26],"existing":[27,145],"approaches":[28],"often":[29],"suffer":[30],"from":[31],"information":[32],"loss":[33],"during":[34],"encoding,":[35],"causing":[36],"representational":[37],"distortion":[38],"before":[39],"the":[40,53,61,97,123,136],"quantization":[41],"process.":[42],"This":[43],"effect":[44],"is":[45],"further":[46],"amplified":[47],"by":[48],"vector":[49],"quantization,":[50],"ultimately":[51],"degrading":[52],"geometric":[54,99],"coherence":[55],"of":[56,101],"text-conditioned":[57,73],"shapes.":[59],"Moreover,":[60],"conventional":[62],"two-stage":[63],"training":[64,112],"paradigm":[65],"induces":[66],"an":[67],"objective":[68],"mismatch":[69],"between":[70],"reconstruction":[71],"and":[72,131,151],"generation.":[75],"To":[76],"address":[77],"these":[78],"issues,":[79],"we":[80,108],"propose":[81],"View-aware":[82],"Auto-Regressive":[83],"(VAR-3D),":[85],"which":[86],"intergrates":[87],"a":[88,110],"view-aware":[89],"Vector":[91],"Quantized-Variational":[92],"AutoEncoder":[93],"(VQ-VAE)":[94],"convert":[96],"complex":[98],"structure":[100],"models":[103],"into":[104],"tokens.":[106],"Additionally,":[107],"introduce":[109],"rendering-supervised":[111],"strategy":[113],"that":[114,141],"couples":[115],"token":[117],"prediction":[118],"with":[119],"visual":[120,129],"reconstruction,":[121],"encouraging":[122],"process":[125],"better":[127],"preserve":[128],"fidelity":[130],"structural":[132],"consistency":[133],"relative":[134],"input":[137],"text.":[138],"Experiments":[139],"demonstrate":[140],"VAR-3D":[142],"significantly":[143],"outperforms":[144],"methods":[146],"both":[148],"quality":[150],"text-3D":[152],"alignment.":[153]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-18T00:00:00"}
