{"id":"https://openalex.org/W4415540960","doi":"https://doi.org/10.1145/3746027.3754886","title":"Cross-Modal Dual-Causal Learning for Long-Term Action Recognition","display_name":"Cross-Modal Dual-Causal Learning for Long-Term Action Recognition","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415540960","doi":"https://doi.org/10.1145/3746027.3754886"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3754886","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754886","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047694599","display_name":"Shaowu Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]},{"id":"https://openalex.org/I4210164898","display_name":"Beijing Chaoyang Emergency Medical Center","ror":"https://ror.org/05anb7a53","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210164898"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shaowu Xu","raw_affiliation_strings":["College of Computer Science, Beijing University of Technology, Chaoyang Qu, Beijing Shi, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Beijing University of Technology, Chaoyang Qu, Beijing Shi, China","institution_ids":["https://openalex.org/I37796252","https://openalex.org/I4210164898"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068966518","display_name":"Xibin Jia","orcid":"https://orcid.org/0000-0001-8799-8042"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]},{"id":"https://openalex.org/I4210164898","display_name":"Beijing Chaoyang Emergency Medical Center","ror":"https://ror.org/05anb7a53","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210164898"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xibin Jia","raw_affiliation_strings":["College of Computer Science, Beijing University of Technology, Chaoyang Qu, Beijing Shi, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Beijing University of Technology, Chaoyang Qu, Beijing Shi, China","institution_ids":["https://openalex.org/I37796252","https://openalex.org/I4210164898"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014526931","display_name":"Junyu Gao","orcid":"https://orcid.org/0000-0002-8105-5497"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junyu Gao","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Haidian Qu, Beijing Shi, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Haidian Qu, Beijing Shi, China","institution_ids":["https://openalex.org/I4210094879"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010566612","display_name":"Qianmei Sun","orcid":"https://orcid.org/0000-0002-1928-7772"},"institutions":[{"id":"https://openalex.org/I2800232565","display_name":"Beijing Chao-Yang Hospital, Capital Medical University","ror":"https://ror.org/01eff5662","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I2800232565","https://openalex.org/I4210110145","https://openalex.org/I4210135108"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qianmei Sun","raw_affiliation_strings":["Beijing Chao-yang Hospital, Capital Medical University, Chaoyang Qu, Beijing Shi, China"],"affiliations":[{"raw_affiliation_string":"Beijing Chao-yang Hospital, Capital Medical University, Chaoyang Qu, Beijing Shi, China","institution_ids":["https://openalex.org/I2800232565"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051293900","display_name":"Jing Chang","orcid":"https://orcid.org/0000-0002-2708-3921"},"institutions":[{"id":"https://openalex.org/I2800232565","display_name":"Beijing Chao-Yang Hospital, Capital Medical University","ror":"https://ror.org/01eff5662","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I2800232565","https://openalex.org/I4210110145","https://openalex.org/I4210135108"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Chang","raw_affiliation_strings":["Beijing Chao-yang Hospital, Capital Medical University, Chaoyang Qu, Beijing Shi, China"],"affiliations":[{"raw_affiliation_string":"Beijing Chao-yang Hospital, Capital Medical University, Chaoyang Qu, Beijing Shi, China","institution_ids":["https://openalex.org/I2800232565"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101013626","display_name":"Chao Fan","orcid":"https://orcid.org/0009-0004-1379-760X"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]},{"id":"https://openalex.org/I4210164898","display_name":"Beijing Chaoyang Emergency Medical Center","ror":"https://ror.org/05anb7a53","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210164898"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Fan","raw_affiliation_strings":["College of Computer Science, Beijing University of Technology, Chaoyang Qu, Beijing Shi, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Beijing University of Technology, Chaoyang Qu, Beijing Shi, China","institution_ids":["https://openalex.org/I37796252","https://openalex.org/I4210164898"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5047694599"],"corresponding_institution_ids":["https://openalex.org/I37796252","https://openalex.org/I4210164898"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16361679,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2919","last_page":"2928"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.9714999794960022,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/causal-model","display_name":"Causal model","score":0.6536999940872192},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5778999924659729},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.41850000619888306},{"id":"https://openalex.org/keywords/causality","display_name":"Causality (physics)","score":0.4169999957084656},{"id":"https://openalex.org/keywords/intervention","display_name":"Intervention (counseling)","score":0.40700000524520874},{"id":"https://openalex.org/keywords/causal-inference","display_name":"Causal inference","score":0.4009000062942505},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.3995000123977661},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3905999958515167}],"concepts":[{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.6536999940872192},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5778999924659729},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5486000180244446},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.542900025844574},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.5023999810218811},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4327999949455261},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4228000044822693},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.41850000619888306},{"id":"https://openalex.org/C64357122","wikidata":"https://www.wikidata.org/wiki/Q1149766","display_name":"Causality (physics)","level":2,"score":0.4169999957084656},{"id":"https://openalex.org/C2780665704","wikidata":"https://www.wikidata.org/wiki/Q959298","display_name":"Intervention (counseling)","level":2,"score":0.40700000524520874},{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.4009000062942505},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.3995000123977661},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3905999958515167},{"id":"https://openalex.org/C27415008","wikidata":"https://www.wikidata.org/wiki/Q7256382","display_name":"Psychological intervention","level":2,"score":0.367000013589859},{"id":"https://openalex.org/C2982736386","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Statistical learning","level":2,"score":0.35749998688697815},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.35530000925064087},{"id":"https://openalex.org/C77350462","wikidata":"https://www.wikidata.org/wiki/Q1125472","display_name":"Confounding","level":2,"score":0.3384999930858612},{"id":"https://openalex.org/C163504300","wikidata":"https://www.wikidata.org/wiki/Q2364925","display_name":"Causal structure","level":2,"score":0.33550000190734863},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.31520000100135803},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2802000045776367},{"id":"https://openalex.org/C2987525970","wikidata":"https://www.wikidata.org/wiki/Q96374569","display_name":"Causal analysis","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.26669999957084656},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.26649999618530273},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.26510000228881836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3754886","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754886","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2049799298","https://openalex.org/W2337252826","https://openalex.org/W2962858109","https://openalex.org/W2963524571","https://openalex.org/W2981385151","https://openalex.org/W3135588948","https://openalex.org/W3176747907","https://openalex.org/W3177934633","https://openalex.org/W3204924011","https://openalex.org/W4214614183","https://openalex.org/W4220671231","https://openalex.org/W4312232143","https://openalex.org/W4379929708","https://openalex.org/W4386071559","https://openalex.org/W4386526950","https://openalex.org/W4394744429"],"related_works":[],"abstract_inverted_index":{"Long-term":[0],"action":[1,14,109],"recognition":[2],"(LTAR)":[3],"is":[4,134],"challenging":[5],"due":[6],"to":[7,67,111],"extended":[8],"temporal":[9],"spans":[10],"with":[11],"complex":[12],"atomic":[13],"correlations":[15,31],"and":[16,73,87,123],"visual":[17,93,96],"confounders.":[18],"Although":[19],"vision-language":[20],"models":[21],"(VLMs)":[22],"have":[23],"shown":[24],"promise,":[25],"they":[26],"often":[27],"rely":[28],"on":[29,117],"statistical":[30],"instead":[32],"of":[33,128],"causal":[34,46,65,69,85,97],"mechanisms.":[35],"Moreover,":[36],"existing":[37],"causality-based":[38],"methods":[39],"address":[40,112],"modal-specific":[41],"biases":[42,79],"but":[43],"lack":[44],"cross-modal":[45,78],"modeling,":[47],"limiting":[48],"their":[49],"utility":[50],"in":[51,80,91],"VLM-based":[52],"LTAR.":[53],"This":[54],"paper":[55],"proposes":[56],"Cross-Modal":[57],"Dual-Causal":[58],"Learning":[59],"(CMDCL),":[60],"which":[61],"introduces":[62],"a":[63],"structural":[64],"model":[66],"uncover":[68],"relationships":[70],"between":[71],"videos":[72],"label":[74],"texts.":[75],"CMDCL":[76],"addresses":[77],"text":[81],"embeddings":[82],"via":[83],"textual":[84],"intervention":[86,98],"removes":[88],"confounders":[89],"inherent":[90],"the":[92,101,126,129],"modality":[94],"through":[95],"guided":[99],"by":[100],"debiased":[102],"text.":[103],"These":[104],"dual-causal":[105],"interventions":[106],"enable":[107],"robust":[108],"representations":[110],"LTAR":[113],"challenges.":[114],"Experimental":[115],"results":[116],"three":[118],"benchmarks":[119],"including":[120],"Charades,":[121],"Breakfast":[122],"COIN,":[124],"demonstrate":[125],"effectiveness":[127],"proposed":[130],"model.":[131],"Our":[132],"code":[133],"available":[135],"at":[136],"https://github.com/xushaowu/CMDCL.":[137]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-25T00:00:00"}
