{"id":"https://openalex.org/W7164822215","doi":"https://doi.org/10.1145/3805622.3810837","title":"Stillness is Redundant: Motion-Aware KV Cache Retrieval for Efficient Video Understanding","display_name":"Stillness is Redundant: Motion-Aware KV Cache Retrieval for Efficient Video Understanding","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164822215","doi":"https://doi.org/10.1145/3805622.3810837"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810837","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810837","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810837","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025640452","display_name":"J Li","orcid":null},"institutions":[{"id":"https://openalex.org/I3124059619","display_name":"China University of Geosciences","ror":"https://ror.org/04gcegc37","country_code":"CN","type":"education","lineage":["https://openalex.org/I3124059619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingru Li","raw_affiliation_strings":["China University of Geosciences(Wuhan), wuhan, China"],"raw_orcid":"https://orcid.org/0009-0001-2994-2873","affiliations":[{"raw_affiliation_string":"China University of Geosciences(Wuhan), wuhan, China","institution_ids":["https://openalex.org/I3124059619"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5138681780","display_name":"Haowen Zheng","orcid":"https://orcid.org/0009-0000-7479-0170"},"institutions":[{"id":"https://openalex.org/I137867983","display_name":"Central University of Finance and Economics","ror":"https://ror.org/008e3hf02","country_code":"CN","type":"education","lineage":["https://openalex.org/I137867983"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haowen Zheng","raw_affiliation_strings":["Central University of Finance and Economics, beijing, China"],"raw_orcid":"https://orcid.org/0009-0000-7479-0170","affiliations":[{"raw_affiliation_string":"Central University of Finance and Economics, beijing, China","institution_ids":["https://openalex.org/I137867983"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93561201,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1644","last_page":"1652"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9661999940872192,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9661999940872192,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.0066999997943639755,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.00559999980032444,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.7077999711036682},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6000999808311462},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5516999959945679},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5105999708175659},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.498199999332428},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.4424000084400177},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.43939998745918274},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.42669999599456787}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8726999759674072},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.7077999711036682},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6000999808311462},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5516999959945679},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5105999708175659},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.498199999332428},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.4424000084400177},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.43939998745918274},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.42669999599456787},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39309999346733093},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.3582000136375427},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3562000095844269},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3400000035762787},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.31459999084472656},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.31380000710487366},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.2992999851703644},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.29330000281333923},{"id":"https://openalex.org/C128840427","wikidata":"https://www.wikidata.org/wiki/Q1302174","display_name":"Motion compensation","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2782999873161316},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.26930001378059387},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C3261483","wikidata":"https://www.wikidata.org/wiki/Q119565","display_name":"Frame rate","level":2,"score":0.2563000023365021},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.2513999938964844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810837","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810837","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810837","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810837","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2140199336","https://openalex.org/W2963526497","https://openalex.org/W4387321091","https://openalex.org/W4389518760","https://openalex.org/W4389519587","https://openalex.org/W4401176373","https://openalex.org/W4402671548","https://openalex.org/W4402713111","https://openalex.org/W4402727142","https://openalex.org/W4403853618","https://openalex.org/W4404782141","https://openalex.org/W4404784276","https://openalex.org/W4412887841","https://openalex.org/W4413158337","https://openalex.org/W4415798413","https://openalex.org/W4417147545","https://openalex.org/W7126222640","https://openalex.org/W7160088235"],"related_works":[],"abstract_inverted_index":{"Large":[0],"Vision-Language":[1],"Models":[2],"(LVLMs)":[3],"have":[4],"achieved":[5],"remarkable":[6],"success":[7],"in":[8,45,137],"video":[9,29,37,109],"understanding":[10],"tasks,":[11],"yet":[12],"they":[13],"suffer":[14],"from":[15,28,75],"substantial":[16],"computational":[17],"overhead":[18],"due":[19],"to":[20,69,101],"the":[21],"large":[22],"number":[23],"of":[24,117,123],"visual":[25,118],"tokens":[26,119],"generated":[27],"frames.":[30],"A":[31],"key":[32],"observation":[33],"is":[34,142],"that":[35,63,94,113],"consecutive":[36],"frames":[38,71],"often":[39],"contain":[40],"significant":[41],"temporal":[42],"redundancy,":[43],"especially":[44],"static":[46,76],"or":[47],"slowly-changing":[48],"scenes.":[49],"In":[50],"this":[51],"paper,":[52],"we":[53],"propose":[54],"MotionKV,":[55],"a":[56,90],"training-free,":[57],"motion-aware":[58],"KV":[59],"cache":[60],"compression":[61],"framework":[62],"selectively":[64],"retains":[65,115],"key-value":[66],"pairs":[67],"corresponding":[68],"high-motion":[70],"while":[72,120],"compressing":[73],"those":[74],"regions.":[77],"Our":[78],"method":[79],"computes":[80],"per-frame":[81],"motion":[82],"scores":[83],"using":[84],"pixel-level":[85],"difference":[86],"analysis,":[87],"then":[88],"applies":[89],"hybrid":[91],"selection":[92],"strategy":[93],"combines":[95],"motion-based":[96],"importance":[97],"with":[98,134],"background-aware":[99],"penalties":[100],"guide":[102],"token-level":[103],"retention.":[104],"Extensive":[105],"experiments":[106],"on":[107,126],"three":[108],"question-answering":[110],"benchmarks":[111],"demonstrate":[112],"MotionKV":[114],"12.5%":[116],"maintaining":[121],"98.8%":[122],"full-cache":[124],"accuracy":[125],"VideoMME":[127],"(Qwen2-VL-7B),":[128],"achieving":[129],"3.1":[130],"\u00d7":[131],"inference":[132],"speedup":[133],"68%":[135],"reduction":[136],"peak":[138],"memory.":[139],"The":[140],"approach":[141],"model-agnostic":[143],"and":[144],"requires":[145],"no":[146],"fine-tuning,":[147],"enabling":[148],"seamless":[149],"integration":[150],"into":[151],"existing":[152],"LVLMs.":[153]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
