{"id":"https://openalex.org/W7140277059","doi":"https://doi.org/10.48550/arxiv.2603.23497","title":"WildWorld: A Large-Scale Dataset for Dynamic World Modeling with Actions and Explicit State toward Generative ARPG","display_name":"WildWorld: A Large-Scale Dataset for Dynamic World Modeling with Actions and Explicit State toward Generative ARPG","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7140277059","doi":"https://doi.org/10.48550/arxiv.2603.23497"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.23497","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23497","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.23497","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130608450","display_name":"Zhen Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Zhen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102629811","display_name":"Zian Meng","orcid":"https://orcid.org/0009-0001-7965-0668"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Zian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130553299","display_name":"Shuwei Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Shuwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036259025","display_name":"Wenshuo Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Wenshuo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130579840","display_name":"Yuwei Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Yuwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130598305","display_name":"Bo Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Bo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130602090","display_name":"Chuanhao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Chuanhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130560297","display_name":"Kaipeng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Kaipeng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5130608450"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4677000045776367,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4677000045776367,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.16329999268054962,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.09120000153779984,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6905999779701233},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5899999737739563},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5212000012397766},{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.5004000067710876},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5001000165939331},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4562000036239624}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.738099992275238},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6905999779701233},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6050999760627747},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5899999737739563},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5212000012397766},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.5004000067710876},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5001000165939331},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4562000036239624},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.39489999413490295},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3416000008583069},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31459999084472656},{"id":"https://openalex.org/C3018412434","wikidata":"https://www.wikidata.org/wiki/Q7889","display_name":"Video game","level":2,"score":0.3003999888896942},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.3001999855041504},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.29330000281333923},{"id":"https://openalex.org/C113336015","wikidata":"https://www.wikidata.org/wiki/Q574010","display_name":"Complete information","level":2,"score":0.28450000286102295},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2793999910354614}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.23497","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23497","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.23497","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23497","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5700711011886597,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Dynamical":[0],"systems":[1],"theory":[2],"and":[3,47,52,86,127,136,151,164,177],"reinforcement":[4],"learning":[5],"view":[6],"world":[7,26,84,102,147],"evolution":[8,89],"as":[9],"latent-state":[10],"dynamics":[11,33,85],"driven":[12],"by":[13,63],"actions,":[14,132],"with":[15,73,105,140],"visual":[16,58],"observations":[17,59],"providing":[18],"partial":[19],"information":[20],"about":[21],"the":[22,41,183],"state.":[23],"Recent":[24],"video":[25,187],"models":[27,80,160],"attempt":[28],"to":[29,57,81,158],"learn":[30,82],"this":[31,94],"action-conditioned":[32,101],"from":[34,111],"data.":[35],"However,":[36],"existing":[37],"datasets":[38],"rarely":[39],"match":[40],"requirement:":[42],"they":[43],"typically":[44],"lack":[45],"diverse":[46],"semantically":[48,174],"meaningful":[49],"action":[50,115],"spaces,":[51],"actions":[53,69,176],"are":[54,70],"directly":[55],"tied":[56],"rather":[60],"than":[61,130],"mediated":[62],"underlying":[64],"states.":[65],"As":[66],"a":[67,99,112],"result,":[68],"often":[71],"entangled":[72],"pixel-level":[74],"changes,":[75],"making":[76],"it":[77],"difficult":[78],"for":[79,185],"structured":[83],"maintain":[87],"consistent":[88],"over":[90,123],"long":[91],"horizons.":[92],"In":[93],"paper,":[95],"we":[96],"propose":[97],"WildWorld,":[98],"large-scale":[100],"modeling":[103,173],"dataset":[104],"explicit":[106],"state":[107,180],"annotations,":[108],"automatically":[109],"collected":[110],"photorealistic":[113],"AAA":[114],"role-playing":[116],"game":[117],"(Monster":[118],"Hunter:":[119],"Wilds).":[120],"WildWorld":[121],"contains":[122],"108":[124],"million":[125],"frames":[126],"features":[128],"more":[129],"450":[131],"including":[133],"movement,":[134],"attacks,":[135],"skill":[137],"casting,":[138],"together":[139],"synchronized":[141],"per-frame":[142],"annotations":[143],"of":[144],"character":[145],"skeletons,":[146],"states,":[148],"camera":[149],"poses,":[150],"depth":[152],"maps.":[153],"We":[154],"further":[155],"derive":[156],"WildBench":[157],"evaluate":[159],"through":[161],"Action":[162],"Following":[163],"State":[165],"Alignment.":[166],"Extensive":[167],"experiments":[168],"reveal":[169],"persistent":[170],"challenges":[171],"in":[172],"rich":[175],"maintaining":[178],"long-horizon":[179],"consistency,":[181],"highlighting":[182],"need":[184],"state-aware":[186],"generation.":[188],"The":[189],"project":[190],"page":[191],"is":[192],"https://shandaai.github.io/wildworld-project/.":[193]},"counts_by_year":[],"updated_date":"2026-03-26T06:10:45.909354","created_date":"2026-03-26T00:00:00"}
