{"id":"https://openalex.org/W7153093749","doi":"https://doi.org/10.48550/arxiv.2604.07966","title":"Lighting-grounded Video Generation with Renderer-based Agent Reasoning","display_name":"Lighting-grounded Video Generation with Renderer-based Agent Reasoning","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W7153093749","doi":"https://doi.org/10.48550/arxiv.2604.07966"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.07966","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07966","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.07966","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068383153","display_name":"Ziqi Cai","orcid":"https://orcid.org/0000-0002-1912-3161"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cai, Ziqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133327289","display_name":"Taoyu Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Taoyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133381812","display_name":"Zheng Chang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang, Zheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133386651","display_name":"Si Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Si","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133336410","display_name":"Han Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133360896","display_name":"Shuchen Weng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weng, Shuchen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133336895","display_name":"Boxin Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Boxin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5068383153"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7943000197410583,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7943000197410583,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.08139999955892563,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0203000009059906,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.6503999829292297},{"id":"https://openalex.org/keywords/view-synthesis","display_name":"View synthesis","score":0.5430999994277954},{"id":"https://openalex.org/keywords/controllability","display_name":"Controllability","score":0.4799000024795532},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.4754999876022339},{"id":"https://openalex.org/keywords/virtual-reality","display_name":"Virtual reality","score":0.41449999809265137},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.40459999442100525},{"id":"https://openalex.org/keywords/video-production","display_name":"Video production","score":0.37549999356269836}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8199999928474426},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.6503999829292297},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6389999985694885},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5891000032424927},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.5430999994277954},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.4799000024795532},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.4754999876022339},{"id":"https://openalex.org/C194969405","wikidata":"https://www.wikidata.org/wiki/Q170519","display_name":"Virtual reality","level":2,"score":0.41449999809265137},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.40459999442100525},{"id":"https://openalex.org/C166142869","wikidata":"https://www.wikidata.org/wiki/Q60061622","display_name":"Video production","level":2,"score":0.37549999356269836},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.36419999599456787},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.352400004863739},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3474000096321106},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.30790001153945923},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3034000098705292},{"id":"https://openalex.org/C44185422","wikidata":"https://www.wikidata.org/wiki/Q6002064","display_name":"Image-based modeling and rendering","level":3,"score":0.2842000126838684},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.271699994802475},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.2671999931335449}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.07966","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07966","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.07966","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07966","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Diffusion":[0],"models":[1],"have":[2],"achieved":[3],"remarkable":[4],"progress":[5],"in":[6,36],"video":[7,57,69,124,199],"generation,":[8],"but":[9],"their":[10,34],"controllability":[11],"remains":[12],"a":[13,52,64,78,102,108,113,122,136,160,194],"major":[14],"limitation.":[15],"Key":[16],"scene":[17,45,74,150,161,191],"factors":[18],"such":[19],"as":[20],"layout,":[21,87],"lighting,":[22,88],"and":[23,40,89,112,130,143,182],"camera":[24,90],"trajectory":[25],"are":[26],"often":[27],"entangled":[28],"or":[29],"only":[30],"weakly":[31],"modeled,":[32],"restricting":[33],"applicability":[35],"domains":[37],"like":[38],"filmmaking":[39],"virtual":[41],"production":[42],"where":[43,146],"explicit":[44,72],"control":[46,99,173,189],"is":[47,151],"essential.":[48],"We":[49,106],"present":[50],"LiVER,":[51],"diffusion-based":[53],"framework":[54,66,134],"for":[55,197],"scene-controllable":[56],"generation.":[58,200],"To":[59,154],"achieve":[60],"this,":[61],"we":[62,158],"introduce":[63],"novel":[65],"that":[67,163,177],"conditions":[68],"synthesis":[70,145],"on":[71],"3D":[73,104,149,172],"properties,":[75],"supported":[76],"by":[77,97],"new":[79,195],"large-scale":[80],"dataset":[81],"with":[82],"dense":[83],"annotations":[84],"of":[85,139],"object":[86],"parameters.":[91],"Our":[92,133],"method":[93],"disentangles":[94],"these":[95,119],"properties":[96],"rendering":[98],"signals":[100,120],"from":[101],"unified":[103],"representation.":[105],"propose":[107],"lightweight":[109],"conditioning":[110],"module":[111],"progressive":[114],"training":[115],"strategy":[116],"to":[117],"integrate":[118],"into":[121,169],"foundational":[123],"diffusion":[125],"model,":[126],"ensuring":[127],"stable":[128],"convergence":[129],"high":[131],"fidelity.":[132],"enables":[135],"wide":[137],"range":[138],"applications,":[140],"including":[141],"image-to-video":[142],"video-to-video":[144],"the":[147,170],"underlying":[148],"fully":[152],"editable.":[153],"further":[155],"enhance":[156],"usability,":[157],"develop":[159],"agent":[162],"automatically":[164],"translates":[165],"high-level":[166],"user":[167],"instructions":[168],"required":[171],"signals.":[174],"Experiments":[175],"show":[176],"LiVER":[178],"achieves":[179],"state-of-the-art":[180],"photorealism":[181],"temporal":[183],"consistency":[184],"while":[185],"enabling":[186],"precise,":[187],"disentangled":[188],"over":[190],"factors,":[192],"setting":[193],"standard":[196],"controllable":[198]},"counts_by_year":[],"updated_date":"2026-04-11T06:19:08.300824","created_date":"2026-04-11T00:00:00"}
