{"id":"https://openalex.org/W7110040005","doi":"https://doi.org/10.1145/3757377.3763871","title":"VideoFrom3D: 3D Scene Video Generation via Complementary Image and Video Diffusion Models","display_name":"VideoFrom3D: 3D Scene Video Generation via Complementary Image and Video Diffusion Models","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W7110040005","doi":"https://doi.org/10.1145/3757377.3763871"},"language":null,"primary_location":{"id":"doi:10.1145/3757377.3763871","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3757377.3763871","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3757377.3763871","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Geonung Kim","orcid":"https://orcid.org/0000-0003-0806-6963"},"institutions":[{"id":"https://openalex.org/I123900574","display_name":"Pohang University of Science and Technology","ror":"https://ror.org/04xysgw12","country_code":"KR","type":"education","lineage":["https://openalex.org/I123900574"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Geonung Kim","raw_affiliation_strings":["POSTECH, Pohang, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"POSTECH, Pohang, Republic of Korea","institution_ids":["https://openalex.org/I123900574"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Janghyeok Han","orcid":"https://orcid.org/0009-0000-2287-6263"},"institutions":[{"id":"https://openalex.org/I123900574","display_name":"Pohang University of Science and Technology","ror":"https://ror.org/04xysgw12","country_code":"KR","type":"education","lineage":["https://openalex.org/I123900574"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Janghyeok Han","raw_affiliation_strings":["POSTECH, Pohang, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"POSTECH, Pohang, Republic of Korea","institution_ids":["https://openalex.org/I123900574"]}]},{"author_position":"last","author":{"id":null,"display_name":"Sunghyun Cho","orcid":"https://orcid.org/0000-0001-7627-3513"},"institutions":[{"id":"https://openalex.org/I123900574","display_name":"Pohang University of Science and Technology","ror":"https://ror.org/04xysgw12","country_code":"KR","type":"education","lineage":["https://openalex.org/I123900574"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sunghyun Cho","raw_affiliation_strings":["POSTECH, Pohang, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"POSTECH, Pohang, Republic of Korea","institution_ids":["https://openalex.org/I123900574"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I123900574"],"apc_list":null,"apc_paid":null,"fwci":1.319,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86888962,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8780999779701233,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8780999779701233,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.04320000112056732,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.014399999752640724,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.489300012588501},{"id":"https://openalex.org/keywords/streamlines-streaklines-and-pathlines","display_name":"Streamlines, streaklines, and pathlines","score":0.48649999499320984},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.435699999332428},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.4343000054359436},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.36959999799728394},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.3483999967575073},{"id":"https://openalex.org/keywords/image-processing","display_name":"Image processing","score":0.3082999885082245},{"id":"https://openalex.org/keywords/view-synthesis","display_name":"View synthesis","score":0.30329999327659607}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7930999994277954},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7516000270843506},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.722599983215332},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.489300012588501},{"id":"https://openalex.org/C60439489","wikidata":"https://www.wikidata.org/wiki/Q634407","display_name":"Streamlines, streaklines, and pathlines","level":2,"score":0.48649999499320984},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.435699999332428},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.4343000054359436},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.43230000138282776},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.36959999799728394},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.3483999967575073},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.3082999885082245},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.30329999327659607},{"id":"https://openalex.org/C106030495","wikidata":"https://www.wikidata.org/wiki/Q1797012","display_name":"Video compression picture types","level":4,"score":0.2759000062942505},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.2728999853134155},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C23431618","wikidata":"https://www.wikidata.org/wiki/Q1404672","display_name":"Multiview Video Coding","level":4,"score":0.2709999978542328},{"id":"https://openalex.org/C119657128","wikidata":"https://www.wikidata.org/wiki/Q11633","display_name":"Photography","level":2,"score":0.26930001378059387},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.26589998602867126},{"id":"https://openalex.org/C117090137","wikidata":"https://www.wikidata.org/wiki/Q7927977","display_name":"Video post-processing","level":5,"score":0.258899986743927},{"id":"https://openalex.org/C2777897806","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3D modeling","level":2,"score":0.2563999891281128}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3757377.3763871","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3757377.3763871","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3757377.3763871","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3757377.3763871","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W845365781","https://openalex.org/W2955639361","https://openalex.org/W2962785568","https://openalex.org/W3109908659","https://openalex.org/W4214745154","https://openalex.org/W4385318467","https://openalex.org/W4385537219","https://openalex.org/W4386075992","https://openalex.org/W4390871736","https://openalex.org/W4390873054","https://openalex.org/W4390873542","https://openalex.org/W4400582092","https://openalex.org/W4400582299","https://openalex.org/W4400763528","https://openalex.org/W4402703065","https://openalex.org/W4402753917","https://openalex.org/W4402775760","https://openalex.org/W4404526017","https://openalex.org/W4412588075","https://openalex.org/W4413146321"],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,90],"propose":[4,91],"VideoFrom3D,":[5],"a":[6,18,22,47,54,92,111,117,154],"novel":[7],"framework":[8,94,108],"for":[9,71],"synthesizing":[10,46],"high-quality":[11],"3D":[12,29,175],"scene":[13,176,196],"videos":[14,197],"from":[15,49],"coarse":[16,50],"geometry,":[17],"camera":[19,161],"trajectory,":[20],"and":[21,37,84,102,116,163,178,200,205],"reference":[23],"image.":[24],"Our":[25],"approach":[26,44],"streamlines":[27],"the":[28,76,97],"graphic":[30],"design":[31,35],"workflow,":[32],"enabling":[33],"flexible":[34],"exploration":[36],"rapid":[38],"production":[39],"of":[40,78,100,110,174],"deliverables.":[41],"A":[42],"straightforward":[43],"to":[45,67,75,185],"video":[48,55,63,103,155],"geometry":[51],"might":[52],"condition":[53],"diffusion":[56,64,104,135,156],"model":[57],"on":[58,143],"geometric":[59],"structure.":[60],"However,":[61],"existing":[62],"models":[65,177],"struggle":[66],"generate":[68],"high-fidelity":[69],"results":[70],"complex":[72],"scenes":[73],"due":[74],"difficulty":[77],"jointly":[79],"modeling":[80],"visual":[81],"quality,":[82],"motion,":[83],"temporal":[85],"consistency.":[86],"To":[87],"address":[88],"this,":[89],"generative":[93],"that":[95,190],"leverages":[96],"complementary":[98],"strengths":[99],"image":[101,134],"models.":[105],"Specifically,":[106],"our":[107,191],"consists":[109],"Sparse":[112,139],"Anchor-view":[113],"Generation":[114],"(SAG)":[115],"Geometry-guided":[118],"Generative":[119],"Inbetweening":[120],"(GGI)":[121],"module.":[122],"The":[123],"SAG":[124],"module":[125,148],"generates":[126],"high-quality,":[127,194],"cross-view":[128],"consistent":[129],"anchor":[130,145],"views":[131],"using":[132,153],"an":[133],"model,":[136,157],"aided":[137],"by":[138,159],"Appearance-guided":[140],"Sampling.":[141],"Building":[142],"these":[144],"views,":[146],"GGI":[147],"faithfully":[149],"interpolates":[150],"intermediate":[151],"frames":[152],"enhanced":[158],"flow-based":[160],"control":[162],"structural":[164],"guidance.":[165],"Notably,":[166],"both":[167],"modules":[168],"operate":[169],"without":[170],"any":[171],"paired":[172],"dataset":[173],"natural":[179],"images,":[180],"which":[181],"is":[182,209],"extremely":[183],"difficult":[184],"obtain.":[186],"Comprehensive":[187],"experiments":[188],"show":[189],"method":[192],"produces":[193],"style-consistent":[195],"under":[198],"diverse":[199],"challenging":[201],"scenarios,":[202],"outperforming":[203],"simple":[204],"extended":[206],"baselines.":[207],"Code":[208],"available":[210],"at":[211],"github.com/KIMGEONUNG/VideoFrom3D.":[212]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-10T02:45:41.426853","created_date":"2025-12-08T00:00:00"}
