{"id":"https://openalex.org/W4414360088","doi":"https://doi.org/10.24963/ijcai.2025/97","title":"External Memory Matters: Generalizable Object-Action Memory for Retrieval-Augmented Long-Term Video Understanding","display_name":"External Memory Matters: Generalizable Object-Action Memory for Retrieval-Augmented Long-Term Video Understanding","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414360088","doi":"https://doi.org/10.24963/ijcai.2025/97"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/97","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/97","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058458910","display_name":"Jisheng Dang","orcid":"https://orcid.org/0000-0002-5378-6225"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]},{"id":"https://openalex.org/I76214153","display_name":"Lanzhou University","ror":"https://ror.org/01mkqqe32","country_code":"CN","type":"education","lineage":["https://openalex.org/I76214153"]},{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN","SG"],"is_corresponding":true,"raw_author_name":"Jisheng Dang","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University, China","School of Computing, National University of Singapore, Singapore","School of Information Science and Engineering, Lanzhou University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]},{"raw_affiliation_string":"School of Computing, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]},{"raw_affiliation_string":"School of Information Science and Engineering, Lanzhou University, China","institution_ids":["https://openalex.org/I76214153"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034213282","display_name":"Huicheng Zheng","orcid":"https://orcid.org/0000-0002-6729-4176"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huicheng Zheng","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039707001","display_name":"Xudong Wu","orcid":"https://orcid.org/0000-0002-0752-0282"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xudong Wu","raw_affiliation_strings":["School of Electronics and Information Technology, Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School of Electronics and Information Technology, Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101646000","display_name":"Jian Jiao","orcid":"https://orcid.org/0000-0001-6002-7297"},"institutions":[{"id":"https://openalex.org/I3133134087","display_name":"Lanzhou Jiaotong University","ror":"https://ror.org/03144pv92","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133134087"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingmei Jiao","raw_affiliation_strings":["School of Electronic Information Engineering, Lanzhou Jiaotong University, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information Engineering, Lanzhou Jiaotong University, China","institution_ids":["https://openalex.org/I3133134087"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101160189","display_name":"Bimei Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I34949971","display_name":"University of Jinan","ror":"https://ror.org/02mjz6f26","country_code":"CN","type":"education","lineage":["https://openalex.org/I34949971"]},{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]},{"id":"https://openalex.org/I159948400","display_name":"Jinan University","ror":"https://ror.org/02xe5ns62","country_code":"CN","type":"education","lineage":["https://openalex.org/I159948400"]}],"countries":["CN","SG"],"is_corresponding":false,"raw_author_name":"Bimei Wang","raw_affiliation_strings":["College of Cyber Security, Jinan University, China","School of Computing, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"College of Cyber Security, Jinan University, China","institution_ids":["https://openalex.org/I34949971","https://openalex.org/I159948400"]},{"raw_affiliation_string":"School of Computing, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089763731","display_name":"Jun Yang","orcid":"https://orcid.org/0000-0001-6403-3408"},"institutions":[{"id":"https://openalex.org/I3133134087","display_name":"Lanzhou Jiaotong University","ror":"https://ror.org/03144pv92","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133134087"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yang","raw_affiliation_strings":["School of Electronic Information Engineering, Lanzhou Jiaotong University, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information Engineering, Lanzhou Jiaotong University, China","institution_ids":["https://openalex.org/I3133134087"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100380066","display_name":"Bin Hu","orcid":"https://orcid.org/0000-0003-3514-5413"},"institutions":[{"id":"https://openalex.org/I76214153","display_name":"Lanzhou University","ror":"https://ror.org/01mkqqe32","country_code":"CN","type":"education","lineage":["https://openalex.org/I76214153"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Hu","raw_affiliation_strings":["School of Information Science and Engineering, Lanzhou University, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Lanzhou University, China","institution_ids":["https://openalex.org/I76214153"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034685928","display_name":"Jianhuang Lai","orcid":"https://orcid.org/0000-0003-3883-2024"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianhuang Lai","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089404640","display_name":"Tat\u2010Seng Chua","orcid":"https://orcid.org/0000-0001-6097-7807"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Tat Seng Chua","raw_affiliation_strings":["School of Computing, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Computing, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5058458910"],"corresponding_institution_ids":["https://openalex.org/I157773358","https://openalex.org/I165932596","https://openalex.org/I76214153"],"apc_list":null,"apc_paid":null,"fwci":4.1567,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.94700397,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"864","last_page":"872"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7505000233650208},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.5968999862670898},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5072000026702881},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.37290000915527344},{"id":"https://openalex.org/keywords/auxiliary-memory","display_name":"Auxiliary memory","score":0.3677000105381012},{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.33340001106262207},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.31130000948905945}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8686000108718872},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7505000233650208},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.5968999862670898},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5116999745368958},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5072000026702881},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3896999955177307},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.37290000915527344},{"id":"https://openalex.org/C82687282","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Auxiliary memory","level":2,"score":0.3677000105381012},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35910001397132874},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.33340001106262207},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.31130000948905945},{"id":"https://openalex.org/C32833848","wikidata":"https://www.wikidata.org/wiki/Q4115054","display_name":"Extensibility","level":2,"score":0.303600013256073},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.30329999327659607},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C19012869","wikidata":"https://www.wikidata.org/wiki/Q578372","display_name":"Response time","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/97","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/97","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Long":[0],"video":[1,149],"understanding":[2,44,150],"with":[3,81],"Large":[4],"Language":[5],"Models":[6],"(LLMs)":[7],"enables":[8],"the":[9,19,29,46,68,121],"description":[10],"of":[11,31,37,45,70],"objects":[12,27,38],"that":[13,153],"are":[14],"not":[15],"explicitly":[16],"present":[17],"in":[18,25],"training":[20],"data.":[21],"However,":[22],"continuous":[23],"changes":[24],"known":[26],"and":[28,39,87,97,108,131,159],"emergence":[30],"new":[32],"ones":[33],"require":[34],"up-to-date":[35],"knowledge":[36,65,117],"their":[40],"dynamics":[41],"for":[42],"effective":[43,116],"open":[47,122],"world.":[48,123],"To":[49],"alleviate":[50],"this,":[51],"we":[52],"propose":[53],"an":[54,76],"efficient":[55],"Retrieval-Enhanced":[56],"Video":[57],"Understanding":[58],"method,":[59],"dubbed":[60],"REVU,":[61],"which":[62],"leverages":[63],"external":[64,78,106],"to":[66,91,119,137],"enhance":[67],"performance":[69,158],"open-world":[71],"learning.":[72],"First,":[73],"REVU":[74,101],"introduces":[75],"extensible":[77],"text-object":[79],"memory":[80],"minimal":[82],"text-visual":[83],"mapping,":[84],"involving":[85],"static":[86],"dynamic":[88],"multimodal":[89],"information":[90,104],"help":[92],"LLMs-based":[93],"models":[94],"align":[95],"text":[96],"vision":[98],"features.":[99],"Second,":[100],"retrieves":[102],"object":[103],"from":[105,113],"databases":[107],"dynamically":[109],"integrates":[110],"frame-specific":[111],"data":[112,139],"videos,":[114],"enabling":[115],"aggregation":[118],"comprehend":[120],"We":[124],"conducted":[125],"experiments":[126],"on":[127,147],"multiple":[128],"benchmark":[129,148],"datasets,":[130],"our":[132,154],"model":[133,155],"demonstrates":[134],"strong":[135],"adaptability":[136],"out-of-domain":[138],"without":[140],"requiring":[141],"additional":[142],"fine-tuning":[143],"or":[144],"re-training.":[145],"Experiments":[146],"datasets":[151],"reveal":[152],"achieves":[156],"state-of-the-art":[157],"robust":[160],"generalization.":[161]},"counts_by_year":[{"year":2026,"cited_by_count":3}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
