{"id":"https://openalex.org/W7152128867","doi":"https://doi.org/10.48550/arxiv.2604.06036","title":"CodecSight: Leveraging Video Codec Signals for Efficient Streaming VLM Inference","display_name":"CodecSight: Leveraging Video Codec Signals for Efficient Streaming VLM Inference","publication_year":2026,"publication_date":"2026-04-07","ids":{"openalex":"https://openalex.org/W7152128867","doi":"https://doi.org/10.48550/arxiv.2604.06036"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.06036","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06036","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.06036","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133170132","display_name":"Yulin Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Yulin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133184261","display_name":"Yan Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024471641","display_name":"Wenyan Chen","orcid":"https://orcid.org/0000-0001-8949-0816"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Wenyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100644681","display_name":"Jooyoung Park","orcid":"https://orcid.org/0000-0002-6979-9362"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, JooYoung","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133170795","display_name":"Shivaraman Nitin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nitin, Shivaraman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121208996","display_name":"Luo Tao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Luo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133180857","display_name":"Francisco Romero","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Romero, Francisco","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5083116172","display_name":"Dmitrii Ustiugov","orcid":"https://orcid.org/0000-0003-3156-010X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ustiugov, Dmitrii","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.1873999983072281,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.1873999983072281,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09210000187158585,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.07249999791383743,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.5353999733924866},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.4918000102043152},{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.43299999833106995},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.4083000123500824},{"id":"https://openalex.org/keywords/video-processing","display_name":"Video processing","score":0.39969998598098755},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.37279999256134033},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.349700003862381},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.34610000252723694},{"id":"https://openalex.org/keywords/video-capture","display_name":"Video capture","score":0.3407999873161316}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8626999855041504},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.5509999990463257},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.5353999733924866},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.4918000102043152},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.43299999833106995},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.4083000123500824},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.39969998598098755},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.37279999256134033},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.349700003862381},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.34610000252723694},{"id":"https://openalex.org/C151211776","wikidata":"https://www.wikidata.org/wiki/Q2778015","display_name":"Video capture","level":3,"score":0.3407999873161316},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.33719998598098755},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.32359999418258667},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.3167000114917755},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3057999908924103},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30160000920295715},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.3012000024318695},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.299699991941452},{"id":"https://openalex.org/C71901391","wikidata":"https://www.wikidata.org/wiki/Q7126699","display_name":"Upload","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C113954288","wikidata":"https://www.wikidata.org/wiki/Q186885","display_name":"Timestamp","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C59732488","wikidata":"https://www.wikidata.org/wiki/Q2528440","display_name":"Visual analytics","level":3,"score":0.27880001068115234},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C551386961","wikidata":"https://www.wikidata.org/wiki/Q22666","display_name":"File sharing","level":3,"score":0.27559998631477356},{"id":"https://openalex.org/C199683683","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Terabyte","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C23431618","wikidata":"https://www.wikidata.org/wiki/Q1404672","display_name":"Multiview Video Coding","level":4,"score":0.265500009059906},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C108803254","wikidata":"https://www.wikidata.org/wiki/Q857512","display_name":"Smacker video","level":4,"score":0.2590999901294708},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.2565000057220459},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.06036","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06036","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.06036","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06036","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.40617841482162476,"display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Video":[0],"streaming":[1,84],"analytics":[2,86],"is":[3],"a":[4,46,82,90,107,117,187],"crucial":[5],"workload":[6],"for":[7,75],"vision-language":[8],"model":[9],"serving,":[10],"but":[11,34],"the":[12,38,43,98],"high":[13],"cost":[14,24],"of":[15,103,109,139,162,182,189],"multimodal":[16],"inference":[17,23],"limits":[18],"scalability.":[19],"Prior":[20],"systems":[21],"reduce":[22],"by":[25],"exploiting":[26],"temporal":[27,99],"and":[28,66,100,129,153,167,186],"spatial":[29,101],"redundancy":[30],"in":[31,180,193],"video":[32,85,94,125],"streams,":[33],"they":[35],"target":[36],"either":[37,62],"vision":[39],"transformer":[40],"(ViT)":[41],"or":[42,68],"LLM":[44,130,159],"with":[45,132,202],"limited":[47],"view,":[48],"leaving":[49],"end-to-end":[50],"opportunities":[51],"untapped.":[52],"Moreover,":[53],"existing":[54],"methods":[55],"incur":[56],"significant":[57],"overhead":[58],"to":[59,121,184,191],"identify":[60],"redundancy,":[61],"through":[63],"offline":[64,171],"profiling":[65],"training":[67],"costly":[69],"online":[70,166],"computation,":[71],"making":[72],"them":[73],"ill-suited":[74],"dynamic":[76],"real-time":[77],"streams.":[78],"We":[79],"present":[80],"CodecSight,":[81],"codec-guided":[83,147],"system,":[87],"built":[88],"on":[89,142],"key":[91],"observation":[92],"that":[93,175],"codecs":[95],"already":[96],"extract":[97],"structure":[102],"each":[104],"stream":[105],"as":[106,116,135],"byproduct":[108],"compression.":[110],"CodecSight":[111,176],"treats":[112],"this":[113],"codec":[114],"metadata":[115],"low-cost":[118],"runtime":[119],"signal":[120],"unify":[122],"optimization":[123],"across":[124],"decoding,":[126],"visual":[127],"processing,":[128],"prefilling,":[131,160],"transmission":[133],"reduction":[134,188],"an":[136,178],"inherent":[137],"benefit":[138],"operating":[140],"directly":[141],"compressed":[143],"bitstreams.":[144],"This":[145],"drives":[146],"patch":[148],"pruning":[149],"before":[150],"ViT":[151],"encoding":[152],"selective":[154],"key-value":[155],"cache":[156],"refresh":[157],"during":[158],"both":[161],"which":[163],"are":[164],"fully":[165],"do":[168],"not":[169],"require":[170],"training.":[172],"Experiments":[173],"show":[174],"achieves":[177],"improvement":[179],"throughput":[181],"up":[183,190],"3$\\times$,":[185],"87%":[192],"GPU":[194],"compute":[195],"over":[196],"state-of-the-art":[197],"baselines,":[198],"maintaining":[199],"competitive":[200],"accuracy":[201],"only":[203],"0$\\sim$8%":[204],"F1":[205],"drop.":[206]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-09T00:00:00"}
