{"id":"https://openalex.org/W7134005669","doi":"https://doi.org/10.48550/arxiv.2603.04291","title":"CubeComposer: Spatio-Temporal Autoregressive 4K 360\u00b0 Video Generation from Perspective Video","display_name":"CubeComposer: Spatio-Temporal Autoregressive 4K 360\u00b0 Video Generation from Perspective Video","publication_year":2026,"publication_date":"2026-03-04","ids":{"openalex":"https://openalex.org/W7134005669","doi":"https://doi.org/10.48550/arxiv.2603.04291"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.04291","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082484068","display_name":"Lingen Li","orcid":"https://orcid.org/0000-0002-1313-8717"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Lingen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128199972","display_name":"Guangzhi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Guangzhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128161316","display_name":"Xiaoyu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiaoyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128194272","display_name":"Zhaoyang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhaoyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128194952","display_name":"Qi Dou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dou, Qi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122606626","display_name":"Jinwei Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Jinwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128143631","display_name":"Tianfan Xue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xue, Tianfan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128162425","display_name":"Ying Shan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shan, Ying","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5082484068"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.58160001039505,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.58160001039505,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.17870000004768372,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.12110000103712082,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.6931999921798706},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.6790000200271606},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6276000142097473},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5421000123023987},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.5181999802589417},{"id":"https://openalex.org/keywords/virtual-reality","display_name":"Virtual reality","score":0.44670000672340393},{"id":"https://openalex.org/keywords/cube","display_name":"Cube (algebra)","score":0.43369999527931213}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8361999988555908},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.6931999921798706},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.6790000200271606},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6276000142097473},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5583000183105469},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5421000123023987},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.5181999802589417},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4925999939441681},{"id":"https://openalex.org/C194969405","wikidata":"https://www.wikidata.org/wiki/Q170519","display_name":"Virtual reality","level":2,"score":0.44670000672340393},{"id":"https://openalex.org/C53051483","wikidata":"https://www.wikidata.org/wiki/Q861555","display_name":"Cube (algebra)","level":2,"score":0.43369999527931213},{"id":"https://openalex.org/C2781020372","wikidata":"https://www.wikidata.org/wiki/Q533093","display_name":"On the fly","level":2,"score":0.4162999987602234},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4066999852657318},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3138999938964844},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.3098999857902527},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2994000017642975},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2612000107765198},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.2565000057220459},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2563999891281128},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.25600001215934753}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.04291","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.04291","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.04291","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.04291","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Generating":[0],"high-quality":[1],"360\u00b0":[2,67,110],"panoramic":[3],"videos":[4,20,71],"from":[5],"perspective":[6],"input":[7],"is":[8],"one":[9],"of":[10,34],"the":[11],"crucial":[12],"applications":[13],"for":[14,24,119],"virtual":[15],"reality":[16],"(VR),":[17],"whereby":[18],"high-resolution":[19,92],"are":[21,29],"especially":[22],"important":[23],"immersive":[25],"experience.":[26],"Existing":[27],"methods":[28,164],"constrained":[30],"by":[31],"computational":[32],"limitations":[33],"vanilla":[35],"diffusion":[36,61],"models,":[37],"only":[38],"supporting":[39,171],"$\\leq$":[40],"1K":[41],"resolution":[42,167],"native":[43,166],"generation":[44,112],"and":[45,116,139,148,168],"relying":[46],"on":[47,156],"suboptimal":[48],"post":[49],"super-resolution":[50],"to":[51,95,136,150],"increase":[52],"resolution.":[53],"We":[54],"introduce":[55],"CubeComposer,":[56],"a":[57,83,104,123,131],"novel":[58],"spatio-temporal":[59,85,105],"autoregressive":[60,106],"model":[62],"that":[63,108,160],"natively":[64],"generates":[65],"4K-resolution":[66],"videos.":[68],"By":[69],"decomposing":[70],"into":[72],"cubemap":[73],"representations":[74],"with":[75,130],"six":[76],"faces,":[77],"CubeComposer":[78,161],"autoregressively":[79],"synthesizes":[80],"content":[81],"in":[82,98,165],"well-planned":[84],"order,":[86],"reducing":[87],"memory":[88],"demands":[89],"while":[90],"enabling":[91],"output.":[93],"Specifically,":[94],"address":[96],"challenges":[97],"multi-dimensional":[99],"autoregression,":[100],"we":[101],"propose:":[102],"(1)":[103],"strategy":[107],"orchestrates":[109],"video":[111],"across":[113],"cube":[114,124],"faces":[115],"time":[117],"windows":[118],"coherent":[120],"synthesis;":[121],"(2)":[122],"face":[125],"context":[126,133],"management":[127],"mechanism,":[128],"equipped":[129],"sparse":[132],"attention":[134],"design":[135],"improve":[137],"efficiency;":[138],"(3)":[140],"continuity-aware":[141],"techniques,":[142],"including":[143],"cube-aware":[144],"positional":[145],"encoding,":[146],"padding,":[147],"blending":[149],"eliminate":[151],"boundary":[152],"seams.":[153],"Extensive":[154],"experiments":[155],"benchmark":[157],"datasets":[158],"demonstrate":[159],"outperforms":[162],"state-of-the-art":[163],"visual":[169],"quality,":[170],"practical":[172],"VR":[173],"application":[174],"scenarios.":[175],"Project":[176],"page:":[177],"https://lg-li.github.io/project/cubecomposer":[178]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-06T00:00:00"}
