{"id":"https://openalex.org/W7143509073","doi":"https://doi.org/10.48550/arxiv.2603.26193","title":"MemCam: Memory-Augmented Camera Control for Consistent Video Generation","display_name":"MemCam: Memory-Augmented Camera Control for Consistent Video Generation","publication_year":2026,"publication_date":"2026-03-27","ids":{"openalex":"https://openalex.org/W7143509073","doi":"https://doi.org/10.48550/arxiv.2603.26193"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.26193","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26193","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.26193","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130928493","display_name":"Xinhang Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gao, Xinhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130948817","display_name":"Junlin Guan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guan, Junlin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130938934","display_name":"Shuhan Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Shuhan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130974512","display_name":"Wenzhuo Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Wenzhuo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130919805","display_name":"Guanghuan Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Guanghuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130931328","display_name":"Jiacheng Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiacheng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5130928493"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.3677000105381012,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.3677000105381012,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.28769999742507935,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.08860000222921371,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5440000295639038},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.5281999707221985},{"id":"https://openalex.org/keywords/view-synthesis","display_name":"View synthesis","score":0.48559999465942383},{"id":"https://openalex.org/keywords/video-compression-picture-types","display_name":"Video compression picture types","score":0.4706000089645386},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.4625000059604645},{"id":"https://openalex.org/keywords/viewpoints","display_name":"Viewpoints","score":0.4607999920845032},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.45509999990463257},{"id":"https://openalex.org/keywords/multiview-video-coding","display_name":"Multiview Video Coding","score":0.4456999897956848},{"id":"https://openalex.org/keywords/video-capture","display_name":"Video capture","score":0.3921999931335449}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8167999982833862},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6850000023841858},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6419000029563904},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5440000295639038},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.5281999707221985},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.48559999465942383},{"id":"https://openalex.org/C106030495","wikidata":"https://www.wikidata.org/wiki/Q1797012","display_name":"Video compression picture types","level":4,"score":0.4706000089645386},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.4625000059604645},{"id":"https://openalex.org/C2776035091","wikidata":"https://www.wikidata.org/wiki/Q7928819","display_name":"Viewpoints","level":2,"score":0.4607999920845032},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.45509999990463257},{"id":"https://openalex.org/C23431618","wikidata":"https://www.wikidata.org/wiki/Q1404672","display_name":"Multiview Video Coding","level":4,"score":0.4456999897956848},{"id":"https://openalex.org/C151211776","wikidata":"https://www.wikidata.org/wiki/Q2778015","display_name":"Video capture","level":3,"score":0.3921999931335449},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.36010000109672546},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.3555999994277954},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.35260000824928284},{"id":"https://openalex.org/C167510206","wikidata":"https://www.wikidata.org/wiki/Q2835824","display_name":"Block-matching algorithm","level":4,"score":0.3458000123500824},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.3443000018596649},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.3237999975681305},{"id":"https://openalex.org/C146044194","wikidata":"https://www.wikidata.org/wiki/Q5157334","display_name":"Computational photography","level":4,"score":0.31520000100135803},{"id":"https://openalex.org/C2776566319","wikidata":"https://www.wikidata.org/wiki/Q3495514","display_name":"Interactive video","level":2,"score":0.3037000000476837},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.2980000078678131},{"id":"https://openalex.org/C128840427","wikidata":"https://www.wikidata.org/wiki/Q1302174","display_name":"Motion compensation","level":2,"score":0.2964000105857849},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2825999855995178},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.28119999170303345},{"id":"https://openalex.org/C166142869","wikidata":"https://www.wikidata.org/wiki/Q60061622","display_name":"Video production","level":2,"score":0.2782000005245209},{"id":"https://openalex.org/C108803254","wikidata":"https://www.wikidata.org/wiki/Q857512","display_name":"Smacker video","level":4,"score":0.2757999897003174},{"id":"https://openalex.org/C30814859","wikidata":"https://www.wikidata.org/wiki/Q4119603","display_name":"Video denoising","level":5,"score":0.2728999853134155},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.265500009059906},{"id":"https://openalex.org/C2778852477","wikidata":"https://www.wikidata.org/wiki/Q313614","display_name":"Video camera","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C39394851","wikidata":"https://www.wikidata.org/wiki/Q921594","display_name":"Inter frame","level":4,"score":0.25589999556541443}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.26193","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26193","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.26193","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26193","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Interactive":[0],"video":[1,10,23,44,113,138],"generation":[2,24,45,114],"has":[3],"significant":[4],"potential":[5],"for":[6],"scene":[7,19,68,133],"simulation":[8],"and":[9,55,73,90],"creation.":[11],"However,":[12],"existing":[13,121],"methods":[14,123],"often":[15],"struggle":[16],"with":[17,66,140],"maintaining":[18],"consistency":[20],"during":[21],"long":[22,137],"under":[25],"dynamic":[26],"camera":[27,64,142],"control":[28],"due":[29],"to":[30,61,94],"limited":[31],"contextual":[32,59,108],"information.":[33,109],"To":[34,70],"address":[35],"this":[36],"challenge,":[37],"we":[38,77],"propose":[39],"MemCam,":[40],"a":[41,79],"memory-augmented":[42],"interactive":[43,112],"approach":[46],"that":[47,83,117],"treats":[48],"previously":[49],"generated":[50],"frames":[51,86],"as":[52,58,124,126],"external":[53],"memory":[54,85],"leverages":[56],"them":[57],"conditioning":[60],"achieve":[62],"controllable":[63],"viewpoints":[65],"high":[67],"consistency.":[69],"enable":[71],"longer":[72],"more":[74],"relevant":[75,99],"context,":[76],"design":[78],"context":[80],"compression":[81],"module":[82],"encodes":[84],"into":[87],"compact":[88],"representations":[89],"employs":[91],"co-visibility-based":[92],"selection":[93],"dynamically":[95],"retrieve":[96],"the":[97],"most":[98],"historical":[100],"frames,":[101],"thereby":[102],"reducing":[103],"computational":[104],"overhead":[105],"while":[106],"enriching":[107],"Experiments":[110],"on":[111],"tasks":[115],"show":[116],"MemCam":[118],"significantly":[119],"outperforms":[120],"baseline":[122],"well":[125],"open-source":[127],"state-of-the-art":[128],"approaches":[129],"in":[130,136],"terms":[131],"of":[132],"consistency,":[134],"particularly":[135],"scenarios":[139],"large":[141],"rotations.":[143]},"counts_by_year":[],"updated_date":"2026-03-31T06:07:48.031334","created_date":"2026-03-31T00:00:00"}
