{"id":"https://openalex.org/W4416999604","doi":"https://doi.org/10.1145/3763330","title":"Voyager: Long-Range and World-Consistent Video Diffusion for Explorable 3D Scene Generation","display_name":"Voyager: Long-Range and World-Consistent Video Diffusion for Explorable 3D Scene Generation","publication_year":2025,"publication_date":"2025-12-01","ids":{"openalex":"https://openalex.org/W4416999604","doi":"https://doi.org/10.1145/3763330"},"language":"en","primary_location":{"id":"doi:10.1145/3763330","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3763330","pdf_url":null,"source":{"id":"https://openalex.org/S185367456","display_name":"ACM Transactions on Graphics","issn_l":"0730-0301","issn":["0730-0301","1557-7368"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Graphics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001048042","display_name":"Tianyu Huang","orcid":"https://orcid.org/0009-0002-1071-6371"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]},{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN","HK"],"is_corresponding":true,"raw_author_name":"Tianyu Huang","raw_affiliation_strings":["City University of Hong Kong, Hong Kong, Hong Kong","Harbin Institute of Technology, Harbin, China"],"affiliations":[{"raw_affiliation_string":"City University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]},{"raw_affiliation_string":"Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102640354","display_name":"Wangguandong Zheng","orcid":"https://orcid.org/0009-0006-0919-1191"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wangguandong Zheng","raw_affiliation_strings":["Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102701424","display_name":"Tengfei Wang","orcid":"https://orcid.org/0000-0002-3435-8110"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tengfei Wang","raw_affiliation_strings":["Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100366703","display_name":"Yuhao Liu","orcid":"https://orcid.org/0000-0003-0550-4788"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yuhao Liu","raw_affiliation_strings":["City University of Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"City University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076699819","display_name":"Zhenwei Wang","orcid":"https://orcid.org/0000-0003-0215-660X"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenwei Wang","raw_affiliation_strings":["Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075919012","display_name":"Junta Wu","orcid":"https://orcid.org/0009-0009-3928-4606"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junta Wu","raw_affiliation_strings":["Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101944040","display_name":"Jie Jiang","orcid":"https://orcid.org/0000-0001-7019-2077"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Jiang","raw_affiliation_strings":["Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065859286","display_name":"Hui Li","orcid":"https://orcid.org/0000-0001-9198-3951"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Li","raw_affiliation_strings":["Harbin Institute of Technology, Harbin, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006197413","display_name":"Rynson W. H. Lau","orcid":"https://orcid.org/0000-0002-8957-8129"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Rynson Lau","raw_affiliation_strings":["City University of Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"City University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100636655","display_name":"Wangmeng Zuo","orcid":"https://orcid.org/0000-0002-3330-783X"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wangmeng Zuo","raw_affiliation_strings":["Harbin Institute of Technology, Harbin, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034317074","display_name":"Chunchao Guo","orcid":"https://orcid.org/0009-0001-7465-802X"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunchao Guo","raw_affiliation_strings":["Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5001048042"],"corresponding_institution_ids":["https://openalex.org/I168719708","https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":1.2784,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86213899,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"44","issue":"6","first_page":"1","last_page":"15"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.4959000051021576,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.4959000051021576,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.20800000429153442,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.09040000289678574,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/view-synthesis","display_name":"View synthesis","score":0.4797999858856201},{"id":"https://openalex.org/keywords/3d-reconstruction","display_name":"3D reconstruction","score":0.43950000405311584},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4214000105857849},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.40610000491142273},{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.4018000066280365},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.38769999146461487},{"id":"https://openalex.org/keywords/interpolation","display_name":"Interpolation (computer graphics)","score":0.37959998846054077},{"id":"https://openalex.org/keywords/iterative-reconstruction","display_name":"Iterative reconstruction","score":0.36250001192092896},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.3418999910354614}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8187999725341797},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7075999975204468},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6658999919891357},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.4797999858856201},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.43950000405311584},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4214000105857849},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.420199990272522},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.40610000491142273},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.4018000066280365},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.38769999146461487},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.37959998846054077},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.36250001192092896},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.3418999910354614},{"id":"https://openalex.org/C23431618","wikidata":"https://www.wikidata.org/wiki/Q1404672","display_name":"Multiview Video Coding","level":4,"score":0.3271999955177307},{"id":"https://openalex.org/C2777897806","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3D modeling","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C194969405","wikidata":"https://www.wikidata.org/wiki/Q170519","display_name":"Virtual reality","level":2,"score":0.31769999861717224},{"id":"https://openalex.org/C84824328","wikidata":"https://www.wikidata.org/wiki/Q4633097","display_name":"2D to 3D conversion","level":3,"score":0.3165999948978424},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2946000099182129},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.29330000281333923},{"id":"https://openalex.org/C128840427","wikidata":"https://www.wikidata.org/wiki/Q1302174","display_name":"Motion compensation","level":2,"score":0.28679999709129333},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.28600001335144043},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.2777000069618225},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.2718999981880188},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C153715457","wikidata":"https://www.wikidata.org/wiki/Q254183","display_name":"Augmented reality","level":2,"score":0.26010000705718994},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.2542000114917755},{"id":"https://openalex.org/C76935873","wikidata":"https://www.wikidata.org/wiki/Q209121","display_name":"Image sensor","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C146159030","wikidata":"https://www.wikidata.org/wiki/Q7625099","display_name":"Structure from motion","level":3,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3763330","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3763330","pdf_url":null,"source":{"id":"https://openalex.org/S185367456","display_name":"ACM Transactions on Graphics","issn_l":"0730-0301","issn":["0730-0301","1557-7368"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Graphics","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W2738551266","https://openalex.org/W4200150166","https://openalex.org/W4312933868","https://openalex.org/W4382240211","https://openalex.org/W4385318467","https://openalex.org/W4386071957","https://openalex.org/W4400573519","https://openalex.org/W4402667896","https://openalex.org/W4402703008","https://openalex.org/W4402775760","https://openalex.org/W4413146013","https://openalex.org/W4413147574"],"related_works":[],"abstract_inverted_index":{"Real-world":[0],"applications":[1],"like":[2],"video":[3,59,124,153,169],"gaming":[4],"and":[5,47,84,122,147,162,177,208],"virtual":[6],"reality":[7],"often":[8],"demand":[9],"the":[10,92],"ability":[11],"to":[12,131],"model":[13],"3D":[14,32,42,65,95,192],"scenes":[15,43],"that":[16,62,117,172],"users":[17],"can":[18],"explore":[19],"along":[20],"custom":[21],"camera":[22,74,174],"trajectories.":[23],"While":[24],"significant":[25],"progress":[26],"has":[27],"been":[28],"made":[29],"in":[30,198,205],"generating":[31],"objects":[33],"from":[34,68],"text":[35],"or":[36,100],"images,":[37],"creating":[38],"long-range,":[39],"3D-consistent,":[40],"explorable":[41],"remains":[44],"a":[45,57,69,199],"complex":[46],"challenging":[48],"problem.":[49],"In":[50],"this":[51,216],"work,":[52],"we":[53],"present":[54],"Voyager":[55,79],",":[56],"novel":[58],"diffusion":[60],"framework":[61],"generates":[63,119],"world-consistent":[64],"point-cloud":[66],"sequences":[67],"single":[70],"image":[71],"with":[72,86,144,151,159,211],"user-defined":[73],"path.":[75],"Unlike":[76],"existing":[77,128,203],"approaches,":[78],"achieves":[80],"end-to-end":[81],"scene":[82,157],"generation":[83],"reconstruction":[85,96,170],"inherent":[87],"consistency":[88],"across":[89],"frames,":[90],"eliminating":[91],"need":[93],"for":[94,155,181,215],"pipelines":[97],"(e.g.,":[98],"structure-from-motion":[99],"multi-view":[101],"stereo).":[102],"Our":[103],"method":[104],"integrates":[105],"three":[106],"key":[107],"components:":[108],"1)":[109],"World-Consistent":[110],"Video":[111],"Diffusion":[112],":":[113,139,167],"A":[114,168],"unified":[115],"architecture":[116],"jointly":[118],"aligned":[120],"RGB":[121],"depth":[123,179],"sequences,":[125],"conditioned":[126],"on":[127],"world":[129,142],"observation":[130],"ensure":[132],"global":[133],"coherence":[134],"2)":[135],"Long-Range":[136],"World":[137],"Exploration":[138],"An":[140],"efficient":[141],"cache":[143],"point":[145],"culling":[146],"an":[148],"auto-regressive":[149],"inference":[150],"smooth":[152],"sampling":[154],"iterative":[156],"extension":[158],"context-aware":[160],"consistency,":[161],"3)":[163],"Scalable":[164],"Data":[165],"Engine":[166],"pipeline":[171],"automates":[173],"pose":[175],"estimation":[176],"metric":[178],"prediction":[180],"arbitrary":[182],"videos,":[183],"enabling":[184],"large-scale,":[185],"diverse":[186],"training":[187],"data":[188],"curation":[189],"without":[190],"manual":[191],"annotations.":[193],"Collectively,":[194],"these":[195],"designs":[196],"result":[197],"clear":[200],"improvement":[201],"over":[202],"methods":[204],"visual":[206],"quality":[207],"geometric":[209],"accuracy,":[210],"versatile":[212],"applications.":[213],"Code":[214],"paper":[217],"are":[218],"at":[219],"https://github.com/Tencent-Hunyuan/HunyuanWorld-Voyager.":[220]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-12-04T00:00:00"}
