{"id":"https://openalex.org/W7164814965","doi":"https://doi.org/10.1145/3805622.3810743","title":"TD-CoT: Bridging the Holistic-Atomic Gap for Training-Free Temporal Reversal Detection in Video LVLMs","display_name":"TD-CoT: Bridging the Holistic-Atomic Gap for Training-Free Temporal Reversal Detection in Video LVLMs","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164814965","doi":"https://doi.org/10.1145/3805622.3810743"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810743","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810743","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810743","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5138648573","display_name":"Pengfei Huang","orcid":"https://orcid.org/0009-0003-9349-0642"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengfei Huang","raw_affiliation_strings":["School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-9349-0642","affiliations":[{"raw_affiliation_string":"School of Automation and Electrical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5138630704","display_name":"Xuezhen Hou","orcid":"https://orcid.org/0009-0001-3193-8531"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuezhen Hou","raw_affiliation_strings":["School of Mathematics and Physics, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-3193-8531","affiliations":[{"raw_affiliation_string":"School of Mathematics and Physics, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93468515,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1542","last_page":"1546"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8992000222206116,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8992000222206116,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.024800000712275505,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.015699999406933784,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.6621999740600586},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5795000195503235},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.4074999988079071},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.40389999747276306},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.4018000066280365},{"id":"https://openalex.org/keywords/temporal-database","display_name":"Temporal database","score":0.38119998574256897},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.38029998540878296},{"id":"https://openalex.org/keywords/temporal-logic","display_name":"Temporal logic","score":0.37049999833106995}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.770799994468689},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.6621999740600586},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5795000195503235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5442000031471252},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4074999988079071},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.40389999747276306},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.4018000066280365},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.39800000190734863},{"id":"https://openalex.org/C77277458","wikidata":"https://www.wikidata.org/wiki/Q1969246","display_name":"Temporal database","level":2,"score":0.38119998574256897},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.38029998540878296},{"id":"https://openalex.org/C25016198","wikidata":"https://www.wikidata.org/wiki/Q781833","display_name":"Temporal logic","level":2,"score":0.37049999833106995},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3456999957561493},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3402999937534332},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3208000063896179},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.31529998779296875},{"id":"https://openalex.org/C119666444","wikidata":"https://www.wikidata.org/wiki/Q5977280","display_name":"Temporal resolution","level":2,"score":0.31049999594688416},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29280000925064087},{"id":"https://openalex.org/C162670838","wikidata":"https://www.wikidata.org/wiki/Q6057295","display_name":"Interval temporal logic","level":3,"score":0.2903999984264374},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.263700008392334},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.25209999084472656},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810743","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810743","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810743","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810743","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2425121537","https://openalex.org/W4281483047","https://openalex.org/W4392427296","https://openalex.org/W4402671548","https://openalex.org/W4402727142","https://openalex.org/W4404784276","https://openalex.org/W4405172832","https://openalex.org/W4405429615","https://openalex.org/W4411113516","https://openalex.org/W4413145669","https://openalex.org/W4413146669","https://openalex.org/W4416330975","https://openalex.org/W7127448947","https://openalex.org/W7133224126"],"related_works":[],"abstract_inverted_index":{"While":[0],"Large":[1],"Vision-Language":[2],"Models":[3],"(LVLMs)":[4],"demonstrate":[5,125],"impressive":[6],"capabilities":[7],"in":[8],"static":[9],"image":[10],"understanding,":[11],"their":[12],"ability":[13],"to":[14,30,51,115],"reason":[15],"about":[16],"temporal":[17,54,86,98,116,130],"order":[18],"remains":[19],"limited.":[20],"Specifically,":[21],"on":[22,120,135],"adversarial":[23],"reversal":[24],"benchmarks,":[25],"state-of-the-art":[26],"models":[27,48],"often":[28],"degrade":[29],"chance-level":[31],"performance":[32],"despite":[33],"accurately":[34],"recognizing":[35],"the":[36,61],"visual":[37],"content.":[38],"We":[39],"characterize":[40],"this":[41,71,111],"phenomenon":[42],"as":[43],"a":[44,79],"holistic\u2013atomic":[45],"accuracy":[46,132],"gap:":[47],"that":[49,126],"fail":[50],"answer":[52],"video-level":[53],"queries":[55,87],"(58%":[56],"accuracy)":[57],"can":[58],"surprisingly":[59],"verify":[60],"constituent":[62],"atomic":[63],"states":[64],"with":[65],"high":[66],"reliability":[67],"(87%).":[68],"To":[69],"bridge":[70],"gap,":[72],"we":[73],"propose":[74],"TD-CoT":[75,83,127],"(Temporal":[76],"Decomposition":[77],"Chain-of-Thought),":[78],"training-free":[80],"inference":[81],"framework.":[82],"decomposes":[84],"complex":[85],"into":[88],"directional":[89],"state":[90],"triplets":[91],"and":[92,123],"grounds":[93],"each":[94],"sub-state":[95],"onto":[96],"corresponding":[97],"segments":[99],"for":[100],"sequential":[101],"verification.":[102],"An":[103],"adaptive":[104],"router":[105],"ensures":[106],"computational":[107],"efficiency":[108],"by":[109],"applying":[110],"structured":[112],"reasoning":[113,131],"exclusively":[114],"queries.":[117],"Extensive":[118],"experiments":[119],"MVBench,":[121],"TempCompass,":[122],"TimeBlind":[124],"significantly":[128],"improves":[129],"(e.g.,":[133],"+11.7%":[134],"InternVL2)":[136],"without":[137],"compromising":[138],"general":[139],"video":[140],"QA":[141],"performance.":[142]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
