{"id":"https://openalex.org/W7137975466","doi":"https://doi.org/10.1609/aaai.v40i22.38925","title":"WorldAgen: Unified State-Action Prediction with Test-Time World Model Training","display_name":"WorldAgen: Unified State-Action Prediction with Test-Time World Model Training","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137975466","doi":"https://doi.org/10.1609/aaai.v40i22.38925"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i22.38925","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i22.38925","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i22.38925","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082688472","display_name":"Chi Wan","orcid":"https://orcid.org/0000-0002-0845-3130"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chi Wan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046827792","display_name":"Kangrui Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kangrui Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129679667","display_name":"Yuan Si","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan Si","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046257318","display_name":"Pingyue Zhang","orcid":"https://orcid.org/0000-0002-5884-632X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pingyue Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129647391","display_name":"Manling Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Manling Li","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5082688472"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2393736,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"22","first_page":"18584","last_page":"18592"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8032000064849854,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8032000064849854,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.06300000101327896,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.026399999856948853,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5827999711036682},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5482000112533569},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5404000282287598},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4641999900341034},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.44620001316070557},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.4399999976158142},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.37220001220703125},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.3393000066280365}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7651000022888184},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.602400004863739},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5842000246047974},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5827999711036682},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5482000112533569},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5404000282287598},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4641999900341034},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.44620001316070557},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.4399999976158142},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.37220001220703125},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3393000066280365},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.31929999589920044},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.3084999918937683},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.3027999997138977},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2718999981880188},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.2596000134944916}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i22.38925","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i22.38925","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i22.38925","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i22.38925","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"How":[0],"can":[1],"vision-language-action":[2],"(VLA)":[3],"models":[4,48,203],"adapt":[5,84],"to":[6,23,41,51,83,85,137,148,153,178],"new":[7,42,86],"environments":[8,150],"where":[9],"world":[10,18,73,140,202],"dynamics":[11],"shift?":[12],"While":[13],"recent":[14],"research":[15],"has":[16],"combined":[17],"modeling":[19,74],"and":[20,75,109,132,151,162,172],"action":[21,76,156],"prediction":[22,77],"improve":[24],"VLA":[25],"performance,":[26],"existing":[27,194],"methods":[28],"largely":[29],"rely":[30],"on":[31,118,159,185],"pretraining":[32],"in":[33,55,173],"static":[34],"datasets,":[35],"without":[36],"mechanisms":[37],"for":[38],"active":[39],"adaptation":[40,143],"environments.":[43,87],"As":[44],"a":[45,67,90,98,186],"result,":[46],"these":[47],"often":[49],"fail":[50],"generalize":[52],"when":[53],"deployed":[54],"unseen":[56],"scenarios":[57],"with":[58,94,183],"novel":[59],"object":[60],"configurations":[61],"or":[62],"dynamics.":[63],"We":[64],"present":[65],"WorldAgen,":[66],"unified":[68],"framework":[69],"that":[70,101,114,166],"jointly":[71],"learns":[72],"while":[78],"enabling":[79],"test-time":[80],"training":[81],"(TTT)":[82],"WorldAgen":[88,124],"employs":[89],"shared":[91],"Transformer":[92],"backbone":[93],"two":[95],"heads:":[96],"(1)":[97],"world-model":[99],"head":[100,113],"predicts":[102,115],"future":[103],"states":[104],"from":[105],"past":[106],"state-action":[107],"trajectories,":[108],"(2)":[110],"an":[111],"agent-model":[112],"actions":[116],"conditioned":[117],"task":[119],"instructions.":[120],"During":[121],"test":[122],"time,":[123],"samples":[125],"exploratory":[126],"actions,":[127],"collects":[128],"ground-truth":[129],"state":[130],"transitions,":[131],"performs":[133],"lightweight":[134],"TTT":[135,184],"updates":[136],"refine":[138],"its":[139],"model.":[141],"This":[142],"improves":[144],"the":[145,149,160,198],"model's":[146],"understanding":[147],"leads":[152],"more":[154],"accurate":[155],"predictions.":[157],"Experiments":[158],"CALVIN":[161],"LIBERO":[163],"benchmarks":[164],"demonstrate":[165],"our":[167,191],"baseline":[168],"model":[169],"achieves":[170],"comparable,":[171],"some":[174],"cases":[175],"superior,":[176],"performance":[177],"current":[179],"state-of-the-art":[180,195],"approaches.":[181],"Moreover,":[182],"small":[187],"number":[188],"of":[189,200],"samples,":[190],"method":[192],"surpasses":[193],"models,":[196],"highlighting":[197],"effectiveness":[199],"adapting":[201],"at":[204],"inference":[205],"time.":[206]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
