{"id":"https://openalex.org/W7160913171","doi":"https://doi.org/10.48550/arxiv.2605.10434","title":"WorldReasonBench: Human-Aligned Stress Testing of Video Generators as Future World-State Predictors","display_name":"WorldReasonBench: Human-Aligned Stress Testing of Video Generators as Future World-State Predictors","publication_year":2026,"publication_date":"2026-05-11","ids":{"openalex":"https://openalex.org/W7160913171","doi":"https://doi.org/10.48550/arxiv.2605.10434"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.10434","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.10434","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.10434","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135914898","display_name":"Keming Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Keming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135989500","display_name":"Yijing Cui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cui, Yijing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120097996","display_name":"Wenhan Xue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xue, Wenhan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135921553","display_name":"Qijie Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Qijie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135936894","display_name":"Xuan Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Xuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135941048","display_name":"Zhiyuan Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Zhiyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088457247","display_name":"Zuhao Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Zuhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135994449","display_name":"Sudong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Sudong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135971668","display_name":"Sicong Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Sicong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135920261","display_name":"Haowei Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Haowei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135956658","display_name":"Zihan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zihan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135981353","display_name":"Ping Nie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nie, Ping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135956331","display_name":"Wenhu Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Wenhu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135959250","display_name":"Bin Wang","orcid":"https://orcid.org/0000-0002-6812-235X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Bin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7358999848365784,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7358999848365784,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.04500000178813934,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.03660000115633011,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7107999920845032},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.6251000165939331},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.48510000109672546},{"id":"https://openalex.org/keywords/video-quality","display_name":"Video quality","score":0.4625000059604645},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4521999955177307},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.3806000053882599}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7139000296592712},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7107999920845032},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.6251000165939331},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5174000263214111},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.48510000109672546},{"id":"https://openalex.org/C103910844","wikidata":"https://www.wikidata.org/wiki/Q2631256","display_name":"Video quality","level":3,"score":0.4625000059604645},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4521999955177307},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40849998593330383},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.3806000053882599},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3179999887943268},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.3028999865055084},{"id":"https://openalex.org/C2988634675","wikidata":"https://www.wikidata.org/wiki/Q34508","display_name":"Video recording","level":2,"score":0.25119999051094055},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.10434","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.10434","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.10434","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.10434","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Commercial":[0],"video":[1,16,53,72,165,205],"generation":[2,54,206],"systems":[3],"such":[4],"as":[5,56],"Seedance2.0":[6],"and":[7,63,80,98,116,122,134,139,159,176,195],"Veo3.1":[8],"have":[9],"rapidly":[10],"improved,":[11],"strengthening":[12],"the":[13,25],"view":[14],"that":[15,31],"generators":[17],"may":[18],"be":[19],"evolving":[20],"into":[21],"\"world":[22],"simulators.\"":[23],"Yet":[24],"community":[26,200],"still":[27],"lacks":[28],"a":[29,35,67,70,106,146,170],"benchmark":[30,148],"directly":[32],"tests":[33],"whether":[34],"model":[36,68],"can":[37,66,180],"reason":[38],"about":[39],"how":[40],"an":[41,60,64],"observed":[42],"world":[43,177],"should":[44],"evolve":[45],"over":[46,154],"time.":[47],"We":[48,101,142,190],"introduce":[49,144],"WorldReasonBench,":[50],"which":[51],"reframes":[52],"evaluation":[55,196],"world-state":[57],"prediction:":[58],"given":[59],"initial":[61],"state":[62,74],"action,":[65],"generate":[69],"future":[71],"whose":[73],"evolution":[75],"remains":[76],"physically,":[77],"socially,":[78],"logically,":[79],"informationally":[81],"consistent?":[82],"WorldReasonBench":[83],"contains":[84],"436":[85],"curated":[86],"test":[87],"cases":[88],"with":[89,105,149],"structured":[90,114],"ground-truth":[91],"QA":[92,115],"annotations":[93],"spanning":[94],"four":[95],"reasoning":[96,130],"dimensions":[97],"22":[99],"subcategories.":[100],"evaluate":[102],"generated":[103],"videos":[104,179],"human-aligned":[107],"two-part":[108],"methodology:":[109],"Process-aware":[110],"Reasoning":[111],"Verification":[112],"uses":[113],"reasoning-phase":[117],"diagnostics":[118],"to":[119,198],"detect":[120],"temporal":[121,132],"causal":[123],"failures,":[124],"while":[125,183],"Multi-dimensional":[126],"Quality":[127],"Assessment":[128],"scores":[129],"quality,":[131],"consistency,":[133],"visual":[135,174],"aesthetics":[136],"for":[137],"ranking":[138],"reward":[140],"modeling.":[141],"further":[143],"WorldRewardBench,":[145],"preference":[147],"approximately":[150],"6K":[151],"expert-annotated":[152],"pairs":[153],"1.4K":[155],"videos,":[156],"supporting":[157],"pair-wise":[158],"point-wise":[160],"reward-model":[161],"evaluation.":[162],"Across":[163],"modern":[164],"generators,":[166],"our":[167,193],"results":[168],"expose":[169],"persistent":[171],"gap":[172],"between":[173],"plausibility":[175],"reasoning:":[178],"look":[181],"convincing":[182],"failing":[184],"dynamics,":[185],"causality,":[186],"or":[187],"information":[188],"preservation.":[189],"will":[191],"release":[192],"benchmarks":[194],"toolkit":[197],"support":[199],"research":[201],"on":[202],"genuinely":[203],"world-aware":[204],"at":[207],"https://github.com/UniX-AI-Lab/WorldReasonBench/.":[208]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-13T00:00:00"}
