{"id":"https://openalex.org/W7138930129","doi":"https://doi.org/10.48550/arxiv.2603.16870","title":"Demystifing Video Reasoning","display_name":"Demystifing Video Reasoning","publication_year":2026,"publication_date":"2026-03-17","ids":{"openalex":"https://openalex.org/W7138930129","doi":"https://doi.org/10.48550/arxiv.2603.16870"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.16870","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16870","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.16870","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034212098","display_name":"Ruisi Wang","orcid":"https://orcid.org/0009-0001-6794-0721"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Ruisi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054947207","display_name":"Zhongang Cai","orcid":"https://orcid.org/0000-0002-1810-3855"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Zhongang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130159377","display_name":"Fanyi Pu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pu, Fanyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130089109","display_name":"Junxiang Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Junxiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112551705","display_name":"Wanqi Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Wanqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018851198","display_name":"Maijunxian Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Maijunxian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129981065","display_name":"Ran Ji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Ran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130193645","display_name":"Chenyang Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Chenyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129928813","display_name":"Bo Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Bo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129787010","display_name":"Ziqi Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Ziqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129948031","display_name":"Hokin Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Hokin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129792474","display_name":"Dahua Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Dahua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129917825","display_name":"Ziwei Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ziwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129862617","display_name":"Lei Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Lei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5034212098"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.775600016117096,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.775600016117096,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11883","display_name":"Embodied and Extended Cognition","score":0.02759999968111515,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.01360000018030405,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.46320000290870667},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4499000012874603},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.39879998564720154},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.390500009059906},{"id":"https://openalex.org/keywords/opportunistic-reasoning","display_name":"Opportunistic reasoning","score":0.3695000112056732},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.33469998836517334},{"id":"https://openalex.org/keywords/qualitative-reasoning","display_name":"Qualitative reasoning","score":0.33399999141693115},{"id":"https://openalex.org/keywords/reasoning-system","display_name":"Reasoning system","score":0.3294000029563904}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7218999862670898},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5386000275611877},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.46320000290870667},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4499000012874603},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.39879998564720154},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.390500009059906},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.3695000112056732},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.33469998836517334},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.33399999141693115},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.3294000029563904},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.3149000108242035},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.31470000743865967},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.304500013589859},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.2937999963760376},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.29280000925064087},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.28299999237060547},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2736999988555908},{"id":"https://openalex.org/C36964233","wikidata":"https://www.wikidata.org/wiki/Q7920942","display_name":"Verbal reasoning","level":3,"score":0.2624000012874603},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.2619999945163727},{"id":"https://openalex.org/C183521366","wikidata":"https://www.wikidata.org/wiki/Q7256422","display_name":"Psychology of reasoning","level":4,"score":0.2612999975681305},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.259799987077713},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.2567000091075897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.16870","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16870","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.16870","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16870","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,53,79,214,225],"video":[3,11,34,54,215,233],"generation":[4,216],"have":[5],"revealed":[6],"an":[7],"unexpected":[8],"phenomenon:":[9],"diffusion-based":[10],"models":[12,55,74,198,234],"exhibit":[13],"non-trivial":[14],"reasoning":[15,27,52,104,188,212,230],"capabilities.":[16],"Prior":[17],"work":[18,205],"attributes":[19],"this":[20,37,41,97],"to":[21,30,86,107,221],"a":[22,45,87,90,144,179,184,207,219,236],"Chain-of-Frames":[23],"(CoF)":[24],"mechanism,":[25,99],"where":[26,131,156],"is":[28],"assumed":[29],"unfold":[31],"sequentially":[32],"across":[33],"frames.":[35],"In":[36],"work,":[38],"we":[39,71,92,100,147,177],"challenge":[40],"assumption":[42],"and":[43,67,83,118,126,137,167],"uncover":[44,149],"fundamentally":[46],"different":[47,200],"mechanism.":[48],"We":[49],"show":[50],"that":[51,73],"instead":[56],"primarily":[57],"emerges":[58,213],"along":[59],"the":[60,228],"diffusion":[61,145],"denoising":[62,81],"steps.":[63],"Through":[64],"qualitative":[65],"analysis":[66],"targeted":[68],"probing":[69],"experiments,":[70],"find":[72],"explore":[75],"multiple":[76],"candidate":[77],"solutions":[78],"early":[80,132,157],"steps":[82,133,139],"progressively":[84],"converge":[85],"final":[88],"answer,":[89],"process":[91],"term":[93],"Chain-of-Steps":[94],"(CoS).":[95],"Beyond":[96],"core":[98],"identify":[101],"several":[102],"emergent":[103],"behaviors":[105],"critical":[106],"model":[108],"performance:":[109],"(1)":[110],"working":[111],"memory,":[112],"enabling":[113],"persistent":[114],"reference;":[115],"(2)":[116],"self-correction":[117],"enhancement,":[119],"allowing":[120],"recovery":[121],"from":[122,196],"incorrect":[123],"intermediate":[124],"solutions;":[125],"(3)":[127],"perception":[128],"before":[129],"action,":[130],"establish":[134],"semantic":[135],"grounding":[136],"later":[138,168],"perform":[140],"structured":[141],"manipulation.":[142],"During":[143],"step,":[146],"further":[148],"self-evolved":[150],"functional":[151],"specialization":[152],"within":[153],"Diffusion":[154],"Transformers,":[155],"layers":[158,164,169],"encode":[159],"dense":[160],"perceptual":[161],"structure,":[162],"middle":[163],"execute":[165],"reasoning,":[166],"consolidate":[170],"latent":[171,194],"representations.":[172],"Motivated":[173],"by":[174,192],"these":[175],"insights,":[176],"present":[178],"simple":[180],"training-free":[181],"strategy":[182],"as":[183,235],"proof-of-concept,":[185],"demonstrating":[186],"how":[187,211],"can":[189],"be":[190],"improved":[191],"ensembling":[193],"trajectories":[195],"identical":[197],"with":[199],"random":[201],"seeds.":[202],"Overall,":[203],"our":[204],"provides":[206],"systematic":[208],"understanding":[209],"of":[210,232],"models,":[217],"offering":[218],"foundation":[220],"guide":[222],"future":[223],"research":[224],"better":[226],"exploiting":[227],"inherent":[229],"dynamics":[231],"new":[237],"substrate":[238],"for":[239],"intelligence.":[240]},"counts_by_year":[],"updated_date":"2026-03-20T20:54:20.808490","created_date":"2026-03-20T00:00:00"}
