{"id":"https://openalex.org/W7164849451","doi":"https://doi.org/10.1145/3805622.3810792","title":"Retrieval-Augmented Camera Control for Video Diffusion","display_name":"Retrieval-Augmented Camera Control for Video Diffusion","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164849451","doi":"https://doi.org/10.1145/3805622.3810792"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810792","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810792","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810792","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5138162298","display_name":"Lining Wang","orcid":"https://orcid.org/0009-0001-4290-5858"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lining Wang","raw_affiliation_strings":["Harbin Institute of Technology, Harbin, China"],"raw_orcid":"https://orcid.org/0009-0001-4290-5858","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023274785","display_name":"Hongxun Yao","orcid":"https://orcid.org/0000-0003-3298-2574"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongxun Yao","raw_affiliation_strings":["Harbin Institute of Technology, Harbin, China"],"raw_orcid":"https://orcid.org/0000-0003-3298-2574","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5138641927","display_name":"Jinyu Zhang","orcid":"https://orcid.org/0009-0000-2553-0436"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinyu Zhang","raw_affiliation_strings":["Harbin Institute of Technology, Harbin, China"],"raw_orcid":"https://orcid.org/0009-0000-2553-0436","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93886545,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1822","last_page":"1831"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7099000215530396,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7099000215530396,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.04820000007748604,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.04450000077486038,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inpainting","display_name":"Inpainting","score":0.7807000279426575},{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.5468000173568726},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.48429998755455017},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.46779999136924744},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4487000107765198},{"id":"https://openalex.org/keywords/image-stitching","display_name":"Image stitching","score":0.4318000078201294},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.4171999990940094},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4018000066280365}],"concepts":[{"id":"https://openalex.org/C11727466","wikidata":"https://www.wikidata.org/wiki/Q1628157","display_name":"Inpainting","level":3,"score":0.7807000279426575},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.70660001039505},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6722999811172485},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.641700029373169},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.5468000173568726},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.48429998755455017},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.46779999136924744},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4487000107765198},{"id":"https://openalex.org/C29081049","wikidata":"https://www.wikidata.org/wiki/Q1364242","display_name":"Image stitching","level":2,"score":0.4318000078201294},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.4171999990940094},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4018000066280365},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.39010000228881836},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.34940001368522644},{"id":"https://openalex.org/C59519942","wikidata":"https://www.wikidata.org/wiki/Q650665","display_name":"Drone","level":2,"score":0.3328999876976013},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.31040000915527344},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.2980000078678131},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.27619999647140503},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.27090001106262207}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810792","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810792","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810792","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810792","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W2738551266","https://openalex.org/W2964288609","https://openalex.org/W3172568571","https://openalex.org/W3215769467","https://openalex.org/W3216476011","https://openalex.org/W4385318467","https://openalex.org/W4386634656","https://openalex.org/W4390872170","https://openalex.org/W4390873054","https://openalex.org/W4390873331","https://openalex.org/W4390874571","https://openalex.org/W4393148049","https://openalex.org/W4393148505","https://openalex.org/W4400573519","https://openalex.org/W4402667896","https://openalex.org/W4402733576","https://openalex.org/W4402753487","https://openalex.org/W4402754045","https://openalex.org/W4402816534","https://openalex.org/W4403842396","https://openalex.org/W4404652882","https://openalex.org/W4413144795","https://openalex.org/W4413145019","https://openalex.org/W4413145446","https://openalex.org/W4413145947","https://openalex.org/W4413146238","https://openalex.org/W4413147574","https://openalex.org/W4413147631","https://openalex.org/W4413155067","https://openalex.org/W4413156478","https://openalex.org/W4417077838","https://openalex.org/W7160176517"],"related_works":[],"abstract_inverted_index":{"Video":[0],"Diffusion":[1],"Models":[2],"(VDMs)":[3],"have":[4],"demonstrated":[5],"unprecedented":[6],"creativity":[7],"and":[8,86,137,157,164],"realism":[9],"in":[10,161],"generating":[11],"visual":[12,162],"content.":[13],"However,":[14],"taming":[15],"these":[16],"models":[17,38,83],"to":[18,21,39,68,93],"strictly":[19],"adhere":[20],"specific":[22],"camera":[23,165],"trajectories":[24],"remains":[25],"a":[26,33,60,95,105,119,128],"persistent":[27],"challenge.":[28],"Existing":[29],"approaches":[30],"predominantly":[31],"follow":[32],"\"Warp-and-Prediction\"":[34],"paradigm,":[35],"which":[36],"forces":[37],"learn":[40],"complex":[41],"geometric":[42],"inpainting":[43,102],"through":[44],"extensive":[45],"fine-tuning.":[46],"This":[47],"often":[48],"compromises":[49],"the":[50,66,100],"model\u2019s":[51],"native":[52],"generative":[53,125],"diversity.":[54],"To":[55],"address":[56],"this,":[57],"we":[58,71],"propose":[59],"novel":[61],"training-free":[62,155],"framework":[63],"that":[64,147],"shifts":[65],"paradigm":[67],"\u201dRetrieve-and-Refine\u201d.":[69],"First,":[70],"introduce":[72],"Retrieval":[73],"Augmented":[74],"Patch":[75],"Inpainting.":[76],"By":[77,122],"leveraging":[78],"hybrid":[79],"priors":[80,126],"from":[81],"foundation":[82],"(e.g.,":[84],"DINOv3":[85],"VGGT),":[87],"this":[88,116,132],"module":[89],"retrieves":[90],"source":[91],"patches":[92],"construct":[94],"trajectory-aligned":[96],"\"Draft":[97],"Video\",":[98],"transforming":[99],"ill-posed":[101],"problem":[103],"into":[104,118,127],"draft":[106,117],"video":[107],"refine":[108],"task.":[109],"Subsequently,":[110],"our":[111,148],"Coupled":[112],"Dual-Path":[113],"Refinement":[114],"elevates":[115],"photorealistic":[120],"sequence.":[121],"dynamically":[123],"injecting":[124],"parallel":[129],"control":[130,166],"branch,":[131],"mechanism":[133],"heals":[134],"stitching":[135],"artifacts":[136],"synthesizes":[138],"coherent":[139],"content":[140],"for":[141],"out-of-view":[142],"regions.":[143],"Extensive":[144],"experiments":[145],"demonstrate":[146],"method":[149],"achieves":[150],"state-of-the-art":[151],"performance,":[152],"surpassing":[153],"both":[154],"baselines":[156],"fully":[158],"optimized":[159],"methods":[160],"fidelity":[163],"accuracy.":[167]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
