{"id":"https://openalex.org/W7147237759","doi":"https://doi.org/10.48550/arxiv.2603.27650","title":"V-CAST: Video Curvature-Aware Spatio-Temporal Pruning for Efficient Video Large Language Models","display_name":"V-CAST: Video Curvature-Aware Spatio-Temporal Pruning for Efficient Video Large Language Models","publication_year":2026,"publication_date":"2026-03-29","ids":{"openalex":"https://openalex.org/W7147237759","doi":"https://doi.org/10.48550/arxiv.2603.27650"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.27650","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27650","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.27650","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100582144","display_name":"Xinying Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lin, Xinying","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132647992","display_name":"Xuyang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xuyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132604290","display_name":"Yiyu Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yiyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132628681","display_name":"Teng Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Teng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132603695","display_name":"Wenqi Ren","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ren, Wenqi","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100582144"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.932699978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.932699978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.019899999722838402,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.01140000019222498,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.7652000188827515},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6248000264167786},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5562000274658203},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4625999927520752},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4544999897480011},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.45190000534057617},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4343000054359436},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.4291999936103821}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.830299973487854},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.7652000188827515},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6248000264167786},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5562000274658203},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48500001430511475},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4625999927520752},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4544999897480011},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.45190000534057617},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4343000054359436},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.4291999936103821},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.41440001130104065},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.40389999747276306},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.34709998965263367},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.34360000491142273},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3156999945640564},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.30979999899864197},{"id":"https://openalex.org/C127532173","wikidata":"https://www.wikidata.org/wiki/Q179904","display_name":"Hourglass","level":2,"score":0.30559998750686646},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.2928999960422516},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.28769999742507935},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.274399995803833},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26440000534057617},{"id":"https://openalex.org/C115067241","wikidata":"https://www.wikidata.org/wiki/Q1639854","display_name":"Token passing","level":3,"score":0.26420000195503235},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2587999999523163}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.27650","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27650","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.27650","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27650","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Video":[0],"large":[1],"language":[2],"models":[3],"(VideoLLMs)":[4],"show":[5],"strong":[6],"capability":[7],"in":[8,22],"video":[9,90],"understanding,":[10],"yet":[11],"long-context":[12,89],"inference":[13],"is":[14],"still":[15],"dominated":[16],"by":[17,170],"massive":[18],"redundant":[19],"visual":[20,130],"tokens":[21,138],"the":[23,163,167],"prefill":[24],"stage.":[25],"We":[26],"revisit":[27],"token":[28,60,94,111],"compression":[29,95],"for":[30,88],"VideoLLMs":[31,151],"under":[32,67],"a":[33,38,83,97,103,122],"tight":[34],"budget":[35],"and":[36,59,101,116,155,174,178,183],"identify":[37],"key":[39],"bottleneck,":[40],"namely":[41],"insufficient":[42],"spatio-temporal":[43,65],"information":[44],"coverage.":[45],"Existing":[46],"methods":[47],"often":[48],"introduce":[49],"discontinuous":[50],"coverage":[51],"through":[52],"coarse":[53],"per-frame":[54,110],"allocation":[55,106],"or":[56],"scene":[57],"segmentation,":[58],"merging":[61],"can":[62],"further":[63,120],"misalign":[64],"coordinates":[66,142],"MRoPE-style":[68],"discrete":[69],"(t,h,w)":[70],"bindings.":[71],"To":[72],"address":[73],"these":[74],"issues,":[75],"we":[76],"propose":[77],"V-CAST":[78,92,159],"(Video":[79],"Curvature-Aware":[80],"Spatio-Temporal":[81],"Pruning),":[82],"training-free,":[84],"plug-and-play":[85],"pruning":[86],"policy":[87],"inference.":[91],"casts":[93],"as":[96],"trajectory":[98],"approximation":[99],"problem":[100],"introduces":[102],"curvature-guided":[104],"temporal":[105],"module":[107],"that":[108,127,158],"routes":[109],"budgets":[112],"to":[113,143,181],"semantic":[114],"turns":[115],"event":[117],"boundaries.":[118],"It":[119],"adopts":[121],"dual-anchor":[123],"spatial":[124],"selection":[125],"mechanism":[126],"preserves":[128],"high-entropy":[129],"evidence":[131],"without":[132],"attention":[133],"intervention,":[134],"while":[135],"keeping":[136],"retained":[137],"at":[139],"their":[140],"original":[141,164],"maintain":[144],"positional":[145],"alignment.":[146],"Extensive":[147],"experiments":[148],"across":[149],"multiple":[150],"of":[152,162,185],"different":[153],"architectures":[154],"scales":[156],"demonstrate":[157],"achieves":[160],"98.6%":[161],"performance,":[165],"outperforms":[166],"second-best":[168],"method":[169],"+1.1%":[171],"on":[172],"average,":[173],"reduces":[175],"peak":[176],"memory":[177],"total":[179],"latency":[180],"86.7%":[182],"86.4%":[184],"vanilla":[186],"Qwen3-VL-8B-Instruct.":[187]},"counts_by_year":[],"updated_date":"2026-04-02T13:53:19.096889","created_date":"2026-04-02T00:00:00"}
