{"id":"https://openalex.org/W4417125396","doi":"https://doi.org/10.1145/3757377.3763833","title":"Context as Memory: Scene-Consistent Interactive Long Video Generation with Memory Retrieval","display_name":"Context as Memory: Scene-Consistent Interactive Long Video Generation with Memory Retrieval","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W4417125396","doi":"https://doi.org/10.1145/3757377.3763833"},"language":null,"primary_location":{"id":"doi:10.1145/3757377.3763833","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3757377.3763833","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112281651","display_name":"Jiwen Yu","orcid":"https://orcid.org/0000-0001-8577-183X"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Jiwen Yu","raw_affiliation_strings":["University of Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104207086","display_name":"Jianhong Bai","orcid":"https://orcid.org/0000-0002-3121-7259"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianhong Bai","raw_affiliation_strings":["Zhejiang University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961","https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113116658","display_name":"Yiran Qin","orcid":"https://orcid.org/0009-0008-4561-0685"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiran Qin","raw_affiliation_strings":["Chinese University of Hong Kong, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054960327","display_name":"Quande Liu","orcid":"https://orcid.org/0000-0002-3921-5960"},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quande Liu","raw_affiliation_strings":["Kuaishou Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Shenzhen, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101538534","display_name":"Xintao Wang","orcid":"https://orcid.org/0000-0001-6585-8604"},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xintao Wang","raw_affiliation_strings":["Kuaishou Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Shenzhen, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101928510","display_name":"Pengfei Wan","orcid":"https://orcid.org/0000-0001-7225-565X"},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengfei Wan","raw_affiliation_strings":["Kuaishou Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Shenzhen, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102941781","display_name":"Di Zhang","orcid":"https://orcid.org/0009-0006-5475-2728"},"institutions":[{"id":"https://openalex.org/I4401726859","display_name":"Kuaishou (China)","ror":"https://ror.org/0258as409","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726859"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Di Zhang","raw_affiliation_strings":["Kuaishou Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Kuaishou Technology, Shenzhen, China","institution_ids":["https://openalex.org/I4401726859"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005952268","display_name":"Xihui Liu","orcid":"https://orcid.org/0000-0003-1831-9952"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xihui Liu","raw_affiliation_strings":["University of Hong Kong, Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5112281651"],"corresponding_institution_ids":["https://openalex.org/I889458895"],"apc_list":null,"apc_paid":null,"fwci":1.2784,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.8631391,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.46650001406669617,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.46650001406669617,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.09099999815225601,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.0860000029206276,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6944000124931335},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5964999794960022},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5824000239372253},{"id":"https://openalex.org/keywords/context-dependent-memory","display_name":"Context-dependent memory","score":0.508899986743927},{"id":"https://openalex.org/keywords/overlay","display_name":"Overlay","score":0.4163999855518341},{"id":"https://openalex.org/keywords/interactive-video","display_name":"Interactive video","score":0.4092000126838684},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.4032000005245209},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.3982999920845032},{"id":"https://openalex.org/keywords/video-retrieval","display_name":"Video retrieval","score":0.3700999915599823}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8482999801635742},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6944000124931335},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5964999794960022},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5824000239372253},{"id":"https://openalex.org/C76679254","wikidata":"https://www.wikidata.org/wiki/Q5165163","display_name":"Context-dependent memory","level":4,"score":0.508899986743927},{"id":"https://openalex.org/C136085584","wikidata":"https://www.wikidata.org/wiki/Q910289","display_name":"Overlay","level":2,"score":0.4163999855518341},{"id":"https://openalex.org/C2776566319","wikidata":"https://www.wikidata.org/wiki/Q3495514","display_name":"Interactive video","level":2,"score":0.4092000126838684},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4032000005245209},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4025000035762787},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.3982999920845032},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3970000147819519},{"id":"https://openalex.org/C2983174267","wikidata":"https://www.wikidata.org/wiki/Q3775098","display_name":"Video retrieval","level":2,"score":0.3700999915599823},{"id":"https://openalex.org/C82687282","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Auxiliary memory","level":2,"score":0.34630000591278076},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3450999855995178},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.3425000011920929},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3255000114440918},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.3248000144958496},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.32409998774528503},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.31459999084472656},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.31349998712539673},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2912999987602234},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.2865999937057495},{"id":"https://openalex.org/C53833338","wikidata":"https://www.wikidata.org/wiki/Q1061424","display_name":"Context switch","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C172849965","wikidata":"https://www.wikidata.org/wiki/Q3148875","display_name":"Reference frame","level":3,"score":0.26969999074935913},{"id":"https://openalex.org/C106030495","wikidata":"https://www.wikidata.org/wiki/Q1797012","display_name":"Video compression picture types","level":4,"score":0.26499998569488525},{"id":"https://openalex.org/C2985957978","wikidata":"https://www.wikidata.org/wiki/Q492","display_name":"Human memory","level":3,"score":0.2644999921321869},{"id":"https://openalex.org/C33925742","wikidata":"https://www.wikidata.org/wiki/Q361698","display_name":"Page","level":2,"score":0.25760000944137573},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3757377.3763833","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3757377.3763833","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W4388979610","https://openalex.org/W4390872297","https://openalex.org/W4390873054","https://openalex.org/W4393148714","https://openalex.org/W4400573519","https://openalex.org/W4402727155","https://openalex.org/W4407722206","https://openalex.org/W4415798523"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,18,54,135],"interactive":[3,136],"video":[4,20,42,138],"generation":[5,21,139],"have":[6],"shown":[7],"promising":[8],"results,":[9],"yet":[10,48],"existing":[11],"approaches":[12],"struggle":[13],"with":[14],"scene-consistent":[15],"memory":[16,40,133],"capabilities":[17,134],"long":[19,137],"due":[22],"to":[23,67,99,141,146],"limited":[24],"use":[25],"of":[26,88,109,120],"historical":[27,37,91],"context.":[28],"In":[29],"this":[30],"work,":[31],"we":[32,93],"propose":[33,94],"Context-as-Memory,":[34],"which":[35,115],"utilizes":[36],"context":[38,53,64,103],"as":[39],"for":[41],"generation.":[43],"It":[44],"includes":[45],"two":[46],"simple":[47],"effective":[49],"designs:":[50],"(1)":[51],"storing":[52],"frame":[55,72],"format":[56],"without":[57,123],"additional":[58],"post-processing;":[59],"(2)":[60],"conditioning":[61],"by":[62,105],"concatenating":[63],"and":[65],"frames":[66,104,122],"be":[68],"predicted":[69],"along":[70],"the":[71,75,84,95,118],"dimension":[73],"at":[74,159],"input,":[76],"requiring":[77],"no":[78],"external":[79],"control":[80],"modules.":[81],"Furthermore,":[82],"considering":[83],"enormous":[85],"computational":[86],"overhead":[87],"incorporating":[89],"all":[90],"context,":[92],"Memory":[96],"Retrieval":[97],"module":[98],"select":[100],"truly":[101],"relevant":[102],"determining":[106],"FOV":[107],"(Field":[108],"View)":[110],"overlap":[111],"between":[112],"camera":[113],"poses,":[114],"significantly":[116],"reduces":[117],"number":[119],"candidate":[121],"substantial":[124],"information":[125],"loss.":[126],"Experiments":[127],"demonstrate":[128],"that":[129],"Context-as-Memory":[130],"achieves":[131],"superior":[132],"compared":[140],"SOTAs,":[142],"even":[143],"generalizing":[144],"effectively":[145],"open-domain":[147],"scenarios":[148],"not":[149],"seen":[150],"during":[151],"training.":[152],"Our":[153],"project":[154],"page":[155],"are":[156],"publicly":[157],"available":[158],"https://context-as-memory.github.io/.":[160]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-12-08T00:00:00"}
