{"id":"https://openalex.org/W4415538328","doi":"https://doi.org/10.1145/3746027.3755307","title":"CITR: Efficient Long Video Understanding Needs Causal Importance","display_name":"CITR: Efficient Long Video Understanding Needs Causal Importance","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415538328","doi":"https://doi.org/10.1145/3746027.3755307"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755307","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3755307","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3746027.3755307","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101845160","display_name":"Ziqi Yuan","orcid":"https://orcid.org/0000-0003-2397-2163"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziqi Yuan","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-2397-2163","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jun Li","orcid":"https://orcid.org/0009-0001-5473-2042"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Li","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-5473-2042","affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030047020","display_name":"Yanghao Li","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanghao Li","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-7283-7344","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yuxiang Huang","orcid":"https://orcid.org/0009-0006-9208-9840"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxiang Huang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-9208-9840","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Chi Chen","orcid":"https://orcid.org/0009-0001-5254-4080"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chi Chen","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-5254-4080","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032568661","display_name":"Shuo Wang","orcid":"https://orcid.org/0000-0001-5408-3145"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuo Wang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5408-3145","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032939748","display_name":"Zhinan Gou","orcid":"https://orcid.org/0000-0002-4421-273X"},"institutions":[{"id":"https://openalex.org/I113992204","display_name":"Hebei University of Economics and Business","ror":"https://ror.org/05j1kc284","country_code":"CN","type":"education","lineage":["https://openalex.org/I113992204"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhinan Gou","raw_affiliation_strings":["Hebei University of Economics and Business, Shijiazhuang, China"],"raw_orcid":"https://orcid.org/0000-0002-4421-273X","affiliations":[{"raw_affiliation_string":"Hebei University of Economics and Business, Shijiazhuang, China","institution_ids":["https://openalex.org/I113992204"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26009735,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4068","last_page":"4076"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.8009999990463257},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5712000131607056},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5670999884605408},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.5629000067710876},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.47620001435279846},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.44440001249313354},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.36160001158714294},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.3506999909877777}],"concepts":[{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.8009999990463257},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7739999890327454},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5712000131607056},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5670999884605408},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.5629000067710876},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.47620001435279846},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.44440001249313354},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.36160001158714294},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.3506999909877777},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3495999872684479},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.335099995136261},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.33169999718666077},{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.3158000111579895},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3140999972820282},{"id":"https://openalex.org/C19012869","wikidata":"https://www.wikidata.org/wiki/Q578372","display_name":"Response time","level":2,"score":0.30640000104904175},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C94361409","wikidata":"https://www.wikidata.org/wiki/Q7882500","display_name":"Uncertainty reduction theory","level":2,"score":0.29109999537467957},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2822999954223633},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.27790001034736633},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.267300009727478},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.25440001487731934}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755307","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3755307","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3746027.3755307","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3755307","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":2,"referenced_works":["https://openalex.org/W3175961224","https://openalex.org/W4402702917"],"related_works":[],"abstract_inverted_index":{"Long":[0],"video":[1,53,100,118],"understanding":[2,119],"is":[3,90],"essential":[4],"for":[5,31,83],"various":[6],"practical":[7,122],"applications":[8],"including":[9],"surveillance":[10],"and":[11,125,133,147,161,170],"film":[12],"analysis.":[13],"While":[14],"recent":[15],"Vision-Language":[16],"Models":[17],"(VLMs)":[18],"have":[19],"advanced":[20],"performance":[21,166],"in":[22,42,61,116,143,153],"this":[23,77],"domain,":[24],"efficiency":[25,70],"remains":[26],"a":[27,87,105,157],"key":[28],"challenge,":[29],"especially":[30],"hour-long":[32],"videos.":[33],"Existing":[34],"methods":[35],"commonly":[36],"reduce":[37,112],"visual":[38,113],"tokens":[39],"via":[40],"compression":[41],"the":[43,62,81],"vision":[44],"encoder,":[45],"but":[46],"token":[47,59,114],"count":[48],"still":[49],"grows":[50],"linearly":[51],"with":[52,162],"length.":[54],"Alternative":[55],"approaches":[56],"apply":[57],"importance-based":[58],"reduction":[60],"language":[63],"model,":[64],"yet":[65],"their":[66],"non-causal":[67],"design":[68],"limits":[69],"gains":[71],"to":[72,111],"offline,":[73],"single-query":[74],"settings.":[75],"In":[76],"work,":[78],"we":[79],"emphasize":[80],"need":[82],"causal":[84],"importance":[85],"estimation-where":[86],"token's":[88],"relevance":[89],"determined":[91],"only":[92],"from":[93],"prior":[94],"context-to":[95],"enable":[96],"efficient,":[97],"real-time":[98],"long":[99,117],"understanding.":[101],"We":[102],"propose":[103],"\u00d8urMethod,":[104],"Causal":[106],"Importance-based":[107],"Token":[108],"Reduction":[109],"framework":[110],"redundancy":[115],"tasks,":[120],"enabling":[121],"memory":[123,159],"control":[124],"enhanced":[126],"computational":[127],"efficiency.":[128],"Experiments":[129],"on":[130],"both":[131],"offline":[132,144],"streaming":[134],"benchmarks":[135],"show":[136],"that":[137],"\u00d8urMethod":[138],"reduces":[139],"latency":[140],"by":[141],"49%":[142],"multi-query":[145],"scenarios":[146],"effectively":[148],"controls":[149],"chunked":[150],"prefilling":[151],"time":[152],"streaming,":[154],"all":[155],"within":[156],"24GB":[158],"footprint":[160],"less":[163],"than":[164],"1%":[165],"drop.":[167],"The":[168],"code":[169],"appendix":[171],"are":[172],"available":[173],"at":[174],"https://github.com/Columbine21/CITR.":[175]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-25T00:00:00"}
