{"id":"https://openalex.org/W7135076397","doi":"https://doi.org/10.48550/arxiv.2603.10422","title":"World2Act: Latent Action Post-Training from World Model Dynamics","display_name":"World2Act: Latent Action Post-Training from World Model Dynamics","publication_year":2026,"publication_date":"2026-03-11","ids":{"openalex":"https://openalex.org/W7135076397","doi":"https://doi.org/10.48550/arxiv.2603.10422"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.10422","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10422","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.10422","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114205022","display_name":"An Dinh Vuong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vuong, An Dinh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128819349","display_name":"Tuan Van Vo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Van Vo, Tuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128775724","display_name":"Abdullah Sohail","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sohail, Abdullah","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128901694","display_name":"Haoran Ding","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Haoran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128883909","display_name":"Liang Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Liang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128868290","display_name":"Xiaodan Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Xiaodan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077958416","display_name":"Anqing Duan","orcid":"https://orcid.org/0000-0002-9666-018X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Anqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128859236","display_name":"Ivan Laptev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Laptev, Ivan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128885656","display_name":"Ian Reid","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Reid, Ian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8860999941825867,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8860999941825867,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.021900000050663948,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.021199999377131462,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7245000004768372},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6259999871253967},{"id":"https://openalex.org/keywords/imperfect","display_name":"Imperfect","score":0.5461999773979187},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.520799994468689},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5067999958992004},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4982999861240387},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4300000071525574}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7245000004768372},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7177000045776367},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6741999983787537},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6259999871253967},{"id":"https://openalex.org/C2780310539","wikidata":"https://www.wikidata.org/wiki/Q12547192","display_name":"Imperfect","level":2,"score":0.5461999773979187},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.520799994468689},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5067999958992004},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4982999861240387},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4300000071525574},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.413100004196167},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35260000824928284},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.3488999903202057},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.33899998664855957},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.32760000228881836},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.3140999972820282},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.28209999203681946},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.25540000200271606},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2513999938964844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.10422","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10422","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.10422","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10422","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"World":[0],"Models":[1],"(WMs)":[2],"offer":[3,153],"a":[4,47,70,122,154],"promising":[5],"mechanism":[6],"for":[7],"post-training":[8,27,49,158],"Vision-Language-Action":[9],"(VLA)":[10],"policies":[11,34],"by":[12,40,75,89,135],"providing":[13],"dynamics":[14,54,96,152],"priors":[15],"that":[16,51,149],"improve":[17],"generalization":[18],"under":[19],"task":[20],"and":[21,83,119],"scene":[22],"variation.":[23],"However,":[24],"most":[25],"WM-based":[26,157],"methods":[28],"rely":[29],"on":[30,102,113,121,140],"pixel-space":[31,60,132,161],"supervision,":[32],"making":[33],"sensitive":[35],"to":[36,55,111,137,160],"visual":[37],"artifacts":[38],"introduced":[39],"imperfect":[41],"WM":[42,53,133,151],"rollouts.":[43],"We":[44],"present":[45],"World2Act,":[46],"latent-space":[48],"framework":[50],"transfers":[52],"the":[56,87,146],"VLA":[57,88,127],"policy":[58,91],"without":[59],"supervision.":[61],"World2Act":[62,104],"operates":[63],"in":[64],"two":[65],"stages:":[66],"1)":[67],"it":[68,85,130],"induces":[69],"shared":[71],"video-action":[72],"latent":[73,150],"space":[74],"contrastively":[76],"aligning":[77],"WM-dynamics":[78],"latents":[79],"with":[80],"action":[81,92],"embeddings,":[82],"2)":[84],"post-trains":[86],"guiding":[90],"representations":[93],"toward":[94],"WM-imagined":[95],"rather":[97],"than":[98],"decoded":[99],"pixels.":[100],"Built":[101],"GR00T-N1.6,":[103],"delivers":[105],"absolute":[106],"success-rate":[107],"gains":[108],"of":[109],"up":[110,136],"+2.5%":[112],"simulation":[114],"benchmarks":[115],"(RoboCasa,":[116],"LIBERO,":[117],"Bridge-SIMPLER)":[118],"+6.7%":[120],"real":[123],"robot":[124],"over":[125],"finetuned":[126],"baselines.":[128],"Notably,":[129],"outperforms":[131],"supervision":[134,144],"+6.0%,":[138],"including":[139],"LIBERO":[141],"where":[142],"pixel":[143],"degrades":[145],"baseline,":[147],"suggesting":[148],"more":[155],"stable":[156],"alternative":[159],"transfer.":[162]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-13T00:00:00"}
