{"id":"https://openalex.org/W7164821104","doi":"https://doi.org/10.1145/3805622.3810669","title":"TCRS-QA: Training-Free Chain-of-Thought Reasoning for Shot-Aware Storyline Question Answering in Long-Form Videos","display_name":"TCRS-QA: Training-Free Chain-of-Thought Reasoning for Shot-Aware Storyline Question Answering in Long-Form Videos","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164821104","doi":"https://doi.org/10.1145/3805622.3810669"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810669","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810669","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810669","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014154873","display_name":"Zeng Zhenpeng","orcid":null},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenpeng Zeng","raw_affiliation_strings":["Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0004-6827-139X","affiliations":[{"raw_affiliation_string":"Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100357651","display_name":"Xiaoyu Wu","orcid":"https://orcid.org/0000-0003-3481-7820"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyu Wu","raw_affiliation_strings":["Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3481-7820","affiliations":[{"raw_affiliation_string":"Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089797066","display_name":"Xuxu Wang","orcid":"https://orcid.org/0000-0002-6782-4536"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuxu Wang","raw_affiliation_strings":["Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0000-8096-7243","affiliations":[{"raw_affiliation_string":"Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5138633398","display_name":"Qian Yu","orcid":"https://orcid.org/0009-0005-7694-0829"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qian Yu","raw_affiliation_strings":["Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-7694-0829","affiliations":[{"raw_affiliation_string":"Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121831294","display_name":"Yudong Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yudong Wang","raw_affiliation_strings":["Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-9145-8147","affiliations":[{"raw_affiliation_string":"Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037194804","display_name":"Zihao Liu","orcid":"https://orcid.org/0000-0002-0917-001X"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zihao Liu","raw_affiliation_strings":["Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-0917-001X","affiliations":[{"raw_affiliation_string":"Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93544177,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1514","last_page":"1522"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7767999768257141,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7767999768257141,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.0714000016450882,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.04989999905228615,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.7648000121116638},{"id":"https://openalex.org/keywords/narrative","display_name":"Narrative","score":0.6507999897003174},{"id":"https://openalex.org/keywords/clips","display_name":"CLIPS","score":0.5490999817848206},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5241000056266785},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.4916999936103821},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4731999933719635},{"id":"https://openalex.org/keywords/iterative-and-incremental-development","display_name":"Iterative and incremental development","score":0.3991999924182892},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.38019999861717224},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.37619999051094055}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7796000242233276},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.7648000121116638},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.6507999897003174},{"id":"https://openalex.org/C2778739407","wikidata":"https://www.wikidata.org/wiki/Q165372","display_name":"CLIPS","level":2,"score":0.5490999817848206},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5432000160217285},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5291000008583069},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5241000056266785},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.4916999936103821},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4731999933719635},{"id":"https://openalex.org/C143587482","wikidata":"https://www.wikidata.org/wiki/Q1543216","display_name":"Iterative and incremental development","level":2,"score":0.3991999924182892},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.38269999623298645},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.38019999861717224},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.37619999051094055},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.3626999855041504},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.34139999747276306},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.3260999917984009},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.3138999938964844},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.30379998683929443},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3037000000476837},{"id":"https://openalex.org/C2992734406","wikidata":"https://www.wikidata.org/wiki/Q413267","display_name":"One shot","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25450000166893005},{"id":"https://openalex.org/C2779982483","wikidata":"https://www.wikidata.org/wiki/Q6094420","display_name":"Iterative refinement","level":2,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810669","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810669","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810669","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810669","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W2139009685","https://openalex.org/W2529272619","https://openalex.org/W2606982687","https://openalex.org/W2765716052","https://openalex.org/W2904452845","https://openalex.org/W2963315828","https://openalex.org/W2963541336","https://openalex.org/W2963890755","https://openalex.org/W3034730770","https://openalex.org/W3162565403","https://openalex.org/W3197457832","https://openalex.org/W3203711169","https://openalex.org/W4295101003","https://openalex.org/W4312271977","https://openalex.org/W4312403713","https://openalex.org/W4312864639","https://openalex.org/W4390871765","https://openalex.org/W4402727272","https://openalex.org/W4402753830","https://openalex.org/W4402754220","https://openalex.org/W4402777385","https://openalex.org/W4403725941","https://openalex.org/W4409262277","https://openalex.org/W4413145412","https://openalex.org/W4413145687","https://openalex.org/W4413146116","https://openalex.org/W4413146669","https://openalex.org/W4413147219","https://openalex.org/W4413147303","https://openalex.org/W4413147578","https://openalex.org/W4413147823","https://openalex.org/W4415537069","https://openalex.org/W4415540635","https://openalex.org/W7133196460","https://openalex.org/W7133220561"],"related_works":[],"abstract_inverted_index":{"Long-form":[0],"video":[1,12],"understanding":[2],"is":[3],"crucial":[4],"for":[5,79,107],"tasks":[6],"such":[7],"as":[8,17,66],"movie":[9],"comprehension,":[10],"where":[11],"question":[13],"answering":[14],"(videoQA)":[15],"serves":[16],"a":[18,51],"core":[19],"capability.":[20],"Existing":[21],"videoQA":[22],"systems":[23],"struggle":[24],"with":[25,59],"temporally":[26],"scattered":[27],"evidence":[28,46,77],"and":[29,86,102,110,127,133],"cross-modal":[30],"misalignment;":[31],"models":[32],"fine-tuned":[33],"on":[34,40,131],"short":[35],"clips":[36],"often":[37],"degrade":[38],"sharply":[39],"long-form":[41],"benchmarks":[42],"due":[43],"to":[44,89,97],"fragmented":[45],"reasoning.":[47],"We":[48],"propose":[49],"TCRS-QA,":[50],"multi-stage,":[52],"training-free":[53],"framework":[54],"that":[55],"unifies":[56],"fine-grained":[57],"perception":[58],"iterative":[60,104],"reasoning":[61,106],"by":[62],"treating":[63],"each":[64],"shot":[65],"the":[67,113],"atomic":[68],"unit.":[69],"TCRS-QA":[70,123],"integrates":[71],"four":[72],"complementary":[73],"modules:":[74],"(i)":[75],"hierarchical":[76],"retrieval":[78],"diverse":[80],"keyframe":[81],"selection,":[82],"(ii)":[83],"semantic":[84],"alignment":[85],"textual":[87],"grounding":[88],"filter":[90],"shot-level":[91],"descriptions,":[92],"(iii)":[93],"narrative":[94,99],"rhythm":[95],"detection":[96],"identify":[98],"phase":[100],"transitions,":[101],"(iv)":[103],"chain-of-thought":[105],"multimodal":[108],"inference":[109],"verification.":[111],"On":[112],"ICCV":[114],"2025":[115],"SF20K":[116],"Story-Level":[117],"Movie":[118],"Understanding":[119],"Audio":[120],"Description":[121],"Challenge,":[122],"achieves":[124],"state-of-the-art":[125],"performance,":[126],"delivers":[128],"superior":[129],"performance":[130],"Video-MME":[132],"LongVideoBench":[134],"datasets.":[135]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
