{"id":"https://openalex.org/W7160872185","doi":"https://doi.org/10.48550/arxiv.2605.07568","title":"Tracing the Arrow of Time: Diagnosing Temporal Information Flow in Video-LLMs","display_name":"Tracing the Arrow of Time: Diagnosing Temporal Information Flow in Video-LLMs","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160872185","doi":"https://doi.org/10.48550/arxiv.2605.07568"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.07568","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07568","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.07568","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135834013","display_name":"Peitao Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Peitao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135892327","display_name":"Fei Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Fei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135838352","display_name":"Lis K. Pereira","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pereira, Lis K.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135837543","display_name":"Qianying Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Qianying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5013335631","display_name":"Shigeru Kitazawa","orcid":"https://orcid.org/0000-0002-2067-0189"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kitazawa, Shigeru","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.47909998893737793,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.47909998893737793,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.07819999754428864,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.054999999701976776,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.7366999983787537},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.669700026512146},{"id":"https://openalex.org/keywords/tracing","display_name":"Tracing","score":0.6557999849319458},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6477000117301941},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6456999778747559},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.522599995136261},{"id":"https://openalex.org/keywords/information-bottleneck-method","display_name":"Information bottleneck method","score":0.5171999931335449},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.48539999127388}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7718999981880188},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.7366999983787537},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.669700026512146},{"id":"https://openalex.org/C138673069","wikidata":"https://www.wikidata.org/wiki/Q322229","display_name":"Tracing","level":2,"score":0.6557999849319458},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6477000117301941},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6456999778747559},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5370000004768372},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.522599995136261},{"id":"https://openalex.org/C60008888","wikidata":"https://www.wikidata.org/wiki/Q6031013","display_name":"Information bottleneck method","level":3,"score":0.5171999931335449},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.48539999127388},{"id":"https://openalex.org/C77277458","wikidata":"https://www.wikidata.org/wiki/Q1969246","display_name":"Temporal database","level":2,"score":0.38929998874664307},{"id":"https://openalex.org/C2779136372","wikidata":"https://www.wikidata.org/wiki/Q10283002","display_name":"Information flow","level":2,"score":0.36039999127388},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.3571999967098236},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.33399999141693115},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.30979999899864197},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C119666444","wikidata":"https://www.wikidata.org/wiki/Q5977280","display_name":"Temporal resolution","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.2775000035762787},{"id":"https://openalex.org/C2780339515","wikidata":"https://www.wikidata.org/wiki/Q3074698","display_name":"Arrow","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.2605000138282776},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.2540999948978424}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.07568","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07568","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.07568","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07568","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"Arrow-of-Time":[1],"(AoT)":[2],"task,":[3],"determining":[4],"whether":[5],"a":[6,38,106,114,125,133,162],"video":[7],"plays":[8],"forward":[9],"or":[10,49],"backward":[11],"by":[12,63,157,187],"recognizing":[13],"temporal":[14,47,73,88,92,117,130,150,184,203,210],"irreversibility,":[15],"is":[16],"one":[17],"humans":[18],"solve":[19],"with":[20,86,164,178],"near-perfect":[21],"accuracy,":[22,180],"yet":[23],"frontier":[24],"Video":[25],"Large":[26],"Language":[27],"Models":[28],"(Video-LLMs)":[29],"perform":[30],"only":[31],"modestly":[32],"above":[33],"chance.":[34],"This":[35],"gap":[36],"raises":[37],"key":[39,126],"question:":[40],"do":[41,97],"visual":[42],"backbones":[43],"fail":[44],"to":[45,142,189,218],"encode":[46,90],"information,":[48,131],"does":[50],"information":[51,74,118,217],"bottleneck":[52,115],"lie":[53],"elsewhere":[54],"in":[55,205],"the":[56,65,69,76,139,219],"Video-LLM":[57,70,108,163],"architecture?":[58],"We":[59,81,120],"address":[60],"this":[61,216],"question":[62],"isolating":[64],"vision":[66],"encoder":[67,154],"from":[68],"and":[71,79,170,181,194,212],"tracing":[72],"across":[75,153],"encoder,":[77,167],"projector,":[78,169],"LLM.":[80,220],"find":[82],"that":[83,202],"video-centric":[84,101,166],"encoders":[85,96],"explicit":[87],"modeling":[89],"strong":[91],"signals,":[93],"whereas":[94],"frame-centric":[95],"not.":[98],"However,":[99],"when":[100],"representations":[102],"are":[103],"passed":[104],"through":[105],"standard":[107],"architecture,":[109],"performance":[110,175],"often":[111],"collapses,":[112],"revealing":[113],"of":[116,215],"flow.":[119],"identify":[121],"projector":[122],"design":[123],"as":[124],"factor:":[127],"Q-Former":[128],"disrupts":[129],"while":[132],"time-preserved":[134,168],"MLP":[135],"projection":[136],"substantially":[137],"improves":[138],"LLM's":[140],"access":[141],"such":[143],"information.":[144],"Our":[145,199],"layer-wise":[146],"analysis":[147],"further":[148],"shows":[149],"representation":[151],"dynamics":[152],"layers.":[155],"Guided":[156],"these":[158],"findings,":[159],"we":[160],"build":[161],"temporal-aware":[165],"AoT":[171],"supervision,":[172],"surpassing":[173],"human":[174],"on":[176,192,197],"AoT$_{PPB}$":[177],"98.1\\%":[179],"improving":[182],"broader":[183],"reasoning":[185,204],"tasks":[186],"up":[188],"6.0":[190],"points":[191,196],"VITATECS-Direction":[193],"1.3":[195],"TVBench.":[198],"results":[200],"show":[201],"Video-LLMs":[206],"requires":[207],"both":[208],"effective":[209],"encoding":[211],"reliable":[213],"transfer":[214]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-12T00:00:00"}
