{"id":"https://openalex.org/W7160857276","doi":"https://doi.org/10.48550/arxiv.2605.07593","title":"TraceAV-Bench: Benchmarking Multi-Hop Trajectory Reasoning over Long Audio-Visual Videos","display_name":"TraceAV-Bench: Benchmarking Multi-Hop Trajectory Reasoning over Long Audio-Visual Videos","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160857276","doi":"https://doi.org/10.48550/arxiv.2605.07593"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.07593","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07593","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.07593","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077568052","display_name":"Hengyi Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Hengyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135873942","display_name":"Hao Liang","orcid":"https://orcid.org/0009-0007-4832-0694"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135836228","display_name":"Mingrui Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Mingrui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135867238","display_name":"Bohan Zeng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Bohan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120132025","display_name":"Meiyi Qiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiang, Meiyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081856566","display_name":"Zhengyang Zhao","orcid":"https://orcid.org/0000-0002-8017-3635"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Zhengyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135857227","display_name":"Zimo Meng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Zimo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065994224","display_name":"Zeang Sheng","orcid":"https://orcid.org/0009-0002-4427-3038"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sheng, Zeang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135830667","display_name":"Wentao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Wentao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5037000179290771,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5037000179290771,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.1736000031232834,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.04410000145435333,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.6341999769210815},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6310999989509583},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5972999930381775},{"id":"https://openalex.org/keywords/backward-chaining","display_name":"Backward chaining","score":0.4729999899864197},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.45820000767707825},{"id":"https://openalex.org/keywords/abductive-reasoning","display_name":"Abductive reasoning","score":0.43860000371932983},{"id":"https://openalex.org/keywords/chaining","display_name":"Chaining","score":0.41830000281333923},{"id":"https://openalex.org/keywords/forward-chaining","display_name":"Forward chaining","score":0.38019999861717224}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7095999717712402},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.6341999769210815},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6310999989509583},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6049000024795532},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5972999930381775},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5049999952316284},{"id":"https://openalex.org/C129916263","wikidata":"https://www.wikidata.org/wiki/Q1141183","display_name":"Backward chaining","level":4,"score":0.4729999899864197},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.45820000767707825},{"id":"https://openalex.org/C166088908","wikidata":"https://www.wikidata.org/wiki/Q308495","display_name":"Abductive reasoning","level":2,"score":0.43860000371932983},{"id":"https://openalex.org/C49020025","wikidata":"https://www.wikidata.org/wiki/Q1059099","display_name":"Chaining","level":2,"score":0.41830000281333923},{"id":"https://openalex.org/C142614401","wikidata":"https://www.wikidata.org/wiki/Q777433","display_name":"Forward chaining","level":3,"score":0.38019999861717224},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.3222000002861023},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.28029999136924744},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2793000042438507},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.2759000062942505},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C2779525943","wikidata":"https://www.wikidata.org/wiki/Q1187300","display_name":"Grammaticality","level":3,"score":0.2685999870300293},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2612000107765198},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.25699999928474426},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.2558000087738037}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.07593","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07593","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.07593","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07593","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Real-world":[0],"audio-visual":[1,55,193],"understanding":[2],"requires":[3],"chaining":[4],"evidence":[5],"that":[6,91,124,162,177,185],"is":[7,84,102,167],"sparse,":[8],"temporally":[9],"dispersed,":[10],"and":[11,16,57,79,148,189],"split":[12],"across":[13,95,131],"the":[14,45,125,135,149],"visual":[15],"auditory":[17],"streams,":[18],"whereas":[19],"existing":[20],"benchmarks":[21],"largely":[22,168],"fail":[23],"to":[24,31,39,48,164],"evaluate":[25,50],"this":[26],"capability.":[27],"They":[28],"restrict":[29],"videos":[30],"short":[32],"clips,":[33],"isolate":[34],"modalities,":[35],"or":[36],"reduce":[37],"questions":[38,67],"one-hop":[40],"perception.":[41],"We":[42,175],"introduce":[43],"TraceAV-Bench,":[44],"first":[46],"benchmark":[47,126],"jointly":[49],"multi-hop":[51],"reasoning":[52,89,173],"over":[53,68,191],"long":[54,70],"trajectories":[56],"multimodal":[58,165,172],"hallucination":[59,166],"robustness.":[60],"TraceAV-Bench":[61,122,178],"comprises":[62],"2,200":[63],"rigorously":[64],"validated":[65],"multiple-choice":[66],"578":[69],"videos,":[71],"totaling":[72],"339.5":[73],"hours,":[74],"spanning":[75],"4":[76],"evaluation":[77],"dimensions":[78],"15":[80],"sub-tasks.":[81],"Each":[82],"question":[83],"grounded":[85],"in":[86],"an":[87],"explicit":[88],"chain":[90],"averages":[92],"3.68":[93],"hops":[94],"a":[96,105,111,128],"15.1-minute":[97],"temporal":[98],"span.":[99],"The":[100],"dataset":[101],"built":[103],"by":[104,110],"three-step":[106],"semi-automated":[107],"pipeline":[108],"followed":[109],"strict":[112],"quality":[113],"assurance":[114],"process.":[115],"Evaluation":[116],"of":[117],"multiple":[118],"representative":[119],"OmniLLMs":[120,184],"on":[121,145],"reveals":[123],"poses":[127],"persistent":[129],"challenge":[130],"all":[132],"models,":[133],"with":[134],"strongest":[136],"closed-source":[137],"model":[138,152],"(Gemini":[139],"3.1":[140],"Pro)":[141],"reaching":[142,154],"only":[143],"68.29%":[144],"general":[146,171],"tasks,":[147],"best":[150],"open-source":[151],"(Ming-Flash-Omni-2.0)":[153],"51.70%,":[155],"leaving":[156],"substantial":[157],"headroom.":[158],"Moreover,":[159],"we":[160],"find":[161],"robustness":[163],"decoupled":[169],"from":[170],"performance.":[174],"anticipate":[176],"will":[179],"stimulate":[180],"further":[181],"research":[182],"toward":[183],"can":[186],"reason":[187],"coherently":[188],"faithfully":[190],"long-form":[192],"content.":[194]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-12T00:00:00"}
