{"id":"https://openalex.org/W7157017701","doi":"https://doi.org/10.48550/arxiv.2604.24317","title":"Don't Pause! Every prediction matters in a streaming video","display_name":"Don't Pause! Every prediction matters in a streaming video","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W7157017701","doi":"https://doi.org/10.48550/arxiv.2604.24317"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.24317","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24317","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.24317","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055761154","display_name":"Dibyadip Chatterjee","orcid":"https://orcid.org/0000-0002-2651-3045"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chatterjee, Dibyadip","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103856431","display_name":"Zhanzhong Pang","orcid":"https://orcid.org/0009-0008-8320-1727"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pang, Zhanzhong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134777915","display_name":"Fadime Sener","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sener, Fadime","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100566791","display_name":"Yale Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Yale","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134794151","display_name":"Angela Yao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Angela","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5055761154"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.31279999017715454,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.31279999017715454,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.1979999989271164,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.09380000084638596,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/streaming-algorithm","display_name":"Streaming algorithm","score":0.6517000198364258},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.6334999799728394},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5968999862670898},{"id":"https://openalex.org/keywords/spamming","display_name":"Spamming","score":0.583899974822998},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5544999837875366},{"id":"https://openalex.org/keywords/streaming-data","display_name":"Streaming data","score":0.5264000296592712},{"id":"https://openalex.org/keywords/moment","display_name":"Moment (physics)","score":0.4964999854564667},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.44620001316070557},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.42820000648498535},{"id":"https://openalex.org/keywords/video-streaming","display_name":"Video streaming","score":0.42590001225471497}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8065000176429749},{"id":"https://openalex.org/C187166803","wikidata":"https://www.wikidata.org/wiki/Q2835831","display_name":"Streaming algorithm","level":3,"score":0.6517000198364258},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.6334999799728394},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5968999862670898},{"id":"https://openalex.org/C158955206","wikidata":"https://www.wikidata.org/wiki/Q83058","display_name":"Spamming","level":3,"score":0.583899974822998},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5544999837875366},{"id":"https://openalex.org/C2777611316","wikidata":"https://www.wikidata.org/wiki/Q39045282","display_name":"Streaming data","level":2,"score":0.5264000296592712},{"id":"https://openalex.org/C179254644","wikidata":"https://www.wikidata.org/wiki/Q13222844","display_name":"Moment (physics)","level":2,"score":0.4964999854564667},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.44620001316070557},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.42820000648498535},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.427700012922287},{"id":"https://openalex.org/C2986160907","wikidata":"https://www.wikidata.org/wiki/Q220499","display_name":"Video streaming","level":2,"score":0.42590001225471497},{"id":"https://openalex.org/C2779019669","wikidata":"https://www.wikidata.org/wiki/Q25203946","display_name":"Asynchrony (computer programming)","level":3,"score":0.38429999351501465},{"id":"https://openalex.org/C110738861","wikidata":"https://www.wikidata.org/wiki/Q682598","display_name":"Real Time Streaming Protocol","level":3,"score":0.36570000648498535},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.35670000314712524},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.3443000018596649},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3416000008583069},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33739998936653137},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33329999446868896},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.32919999957084656},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31859999895095825},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3111000061035156},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3009999990463257},{"id":"https://openalex.org/C196921405","wikidata":"https://www.wikidata.org/wiki/Q786431","display_name":"Online algorithm","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C2777027219","wikidata":"https://www.wikidata.org/wiki/Q1284190","display_name":"Constant (computer programming)","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.2736999988555908},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.267300009727478},{"id":"https://openalex.org/C60777511","wikidata":"https://www.wikidata.org/wiki/Q3045002","display_name":"Concept drift","level":3,"score":0.26260000467300415},{"id":"https://openalex.org/C2777851325","wikidata":"https://www.wikidata.org/wiki/Q7094102","display_name":"Online model","level":2,"score":0.2605000138282776},{"id":"https://openalex.org/C2781020372","wikidata":"https://www.wikidata.org/wiki/Q533093","display_name":"On the fly","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.25760000944137573},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.2554999887943268}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.24317","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24317","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.24317","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24317","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.4789094030857086,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Streaming":[0],"video":[1,27,125],"models":[2,40,103],"should":[3],"respond":[4],"the":[5,12,26,95,123],"moment":[6,13],"an":[7,72],"event":[8,156],"unfolds,":[9],"not":[10,138],"after":[11],"has":[14],"passed.":[15],"Yet":[16],"existing":[17,185],"online":[18],"VideoQA":[19],"benchmarks":[20],"remain":[21],"largely":[22],"retrospective.":[23],"They":[24],"pause":[25],"at":[28,42],"fixed":[29],"timestamps,":[30],"pose":[31],"questions":[32],"about":[33],"current":[34],"or":[35],"past":[36],"events,":[37],"and":[38,67,91,188],"score":[39],"only":[41],"those":[43],"moments.":[44],"This":[45],"protocol":[46],"leaves":[47],"streaming":[48,65,85,124,148,161,186],"predictions":[49,86,109],"untested.":[50],"To":[51],"close":[52],"this":[53],"gap,":[54],"we":[55,130],"introduce":[56],"SPOT-Bench,":[57,183],"featuring":[58],"multi-turn":[59],"proactive":[60],"queries":[61],"that":[62,83,153],"evaluate":[63],"general":[64],"perception":[66,157],"assistive":[68],"capabilities":[69],"required":[70],"by":[71,87,171],"always-on,":[73],"real-time":[74],"assistant.":[75],"SPOT-Bench":[76],"comes":[77],"with":[78],"Timeliness-F1,":[79],"a":[80,146,165,179],"consolidated":[81],"metric":[82],"measures":[84],"their":[88,155,160],"temporal":[89],"precision":[90],"balanced":[92],"coverage":[93],"across":[94],"entire":[96],"video.":[97],"Our":[98],"benchmark":[99],"reveals:":[100],"(i)":[101],"offline":[102,151],"detect":[104],"events":[105],"reliably":[106],"but":[107,117],"spam":[108],"unprompted;":[110],"(ii)":[111],"post-training":[112],"for":[113],"silence":[114],"reduces":[115],"spamming":[116],"induces":[118],"unresponsiveness;":[119],"(iii)":[120],"half":[121],"of":[122,150],"expects":[126],"no":[127],"response,":[128],"which":[129],"term":[131,167],"dead-time":[132],"-":[133],"compute":[134,173],"spent":[135],"here":[136],"does":[137],"affect":[139],"response":[140],"latency.":[141],"These":[142],"findings":[143],"motivate":[144],"AsynKV,":[145],"training-free":[147],"adaptation":[149],"models,":[152,187],"retains":[154],"while":[158],"improving":[159],"behavior.":[162],"AsynKV":[163],"features":[164],"long-short":[166],"memory,":[168],"utilized":[169],"efficiently":[170],"scaling":[172],"during":[174],"dead-time.":[175],"It":[176],"serves":[177],"as":[178],"strong":[180],"baseline":[181],"on":[182,191],"outperforming":[184],"achieves":[189],"state-of-the-art":[190],"retrospective":[192],"benchmarks.":[193]},"counts_by_year":[],"updated_date":"2026-04-29T06:16:36.941037","created_date":"2026-04-29T00:00:00"}
