{"id":"https://openalex.org/W7134856072","doi":"https://doi.org/10.48550/arxiv.2603.08519","title":"AtomVLA: Scalable Post-Training for Robotic Manipulation via Predictive Latent World Models","display_name":"AtomVLA: Scalable Post-Training for Robotic Manipulation via Predictive Latent World Models","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134856072","doi":"https://doi.org/10.48550/arxiv.2603.08519"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.08519","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127371310","display_name":"Xiaoquan Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Xiaoquan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110259118","display_name":"Zetian Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Zetian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100753186","display_name":"Chen Cao","orcid":"https://orcid.org/0000-0002-7780-2787"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Chen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114444485","display_name":"Ziyi Liu","orcid":"https://orcid.org/0009-0009-9956-5493"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zonghe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004374151","display_name":"Yihan Sun","orcid":"https://orcid.org/0000-0002-3212-0934"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yihan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110268407","display_name":"Jin\u2019an Pang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pang, Jingrui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128663341","display_name":"Ruijian Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ruijian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128647557","display_name":"Zhen Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Zhen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128667153","display_name":"Kang Pang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pang, Kang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128630896","display_name":"Dingxin He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Dingxin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110921988","display_name":"Mingqi Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Mingqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128653809","display_name":"Jiayu Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jiayu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.5950000286102295,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.5950000286102295,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.16380000114440918,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.09080000221729279,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7700999975204468},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.7287999987602234},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6944000124931335},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5390999913215637},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.47600001096725464},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.46950000524520874},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4237000048160553}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7734000086784363},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7700999975204468},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.7287999987602234},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6944000124931335},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5390999913215637},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5383999943733215},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5253999829292297},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.47600001096725464},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.46950000524520874},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4237000048160553},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.4198000133037567},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4016999900341034},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.34769999980926514},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.28060001134872437},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2791000008583069},{"id":"https://openalex.org/C199519371","wikidata":"https://www.wikidata.org/wiki/Q942695","display_name":"Source lines of code","level":3,"score":0.25609999895095825}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.08519","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.08519","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08519","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.08519","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-Language-Action":[0],"(VLA)":[1],"models":[2,18,50,74],"demonstrate":[3],"remarkable":[4],"potential":[5],"for":[6,29,72,236],"generalizable":[7],"robotic":[8],"manipulation.":[9],"The":[10],"execution":[11],"of":[12,183,233],"complex":[13],"multi-step":[14],"behaviors":[15],"in":[16,60,127,198],"VLA":[17,73,87],"can":[19],"be":[20,224],"improved":[21],"by":[22],"robust":[23],"instruction":[24,46,66],"grounding,":[25],"a":[26,91,99,114],"critical":[27],"component":[28],"effective":[30],"control.":[31],"However,":[32],"current":[33],"paradigms":[34],"predominantly":[35],"rely":[36],"on":[37,157,185,191],"coarse,":[38],"high-level":[39,105],"task":[40],"instructions":[41],"during":[42],"supervised":[43],"fine-tuning.":[44],"This":[45,111],"grounding":[47],"gap":[48,67],"leaves":[49],"without":[51,149],"explicit":[52],"intermediate":[53],"guidance,":[54],"leading":[55],"to":[56,103,119,226],"severe":[57],"compounding":[58],"errors":[59],"long-horizon":[61,137,216],"tasks.":[62,217],"Therefore,":[63],"bridging":[64],"this":[65,79,140,234],"and":[68,189,221],"providing":[69],"scalable":[70,92],"post-training":[71,94],"is":[75],"urgent.":[76],"To":[77],"tackle":[78],"problem,":[80],"we":[81],"propose":[82],"\\method,":[83],"the":[84,128,150,186,192,199,203,227,231],"first":[85],"subtask-aware":[86],"framework":[88,97],"integrated":[89],"with":[90,154],"offline":[93],"pipeline.":[95],"Our":[96],"leverages":[98],"large":[100],"language":[101],"model":[102,118],"decompose":[104],"demonstrations":[106],"into":[107],"fine-grained":[108],"atomic":[109],"subtasks.":[110],"approach":[112,141],"utilizes":[113],"pretrained":[115],"predictive":[116],"world":[117,201],"score":[120],"candidate":[121],"action":[122],"chunks":[123],"against":[124,173],"subtask":[125],"goals":[126],"latent":[129],"space,":[130],"mitigating":[131],"error":[132],"accumulation":[133],"while":[134],"significantly":[135],"improving":[136],"robustness.":[138],"Furthermore,":[139],"enables":[142],"highly":[143],"efficient":[144],"Group":[145],"Relative":[146],"Policy":[147],"Optimization":[148],"prohibitive":[151],"expenses":[152],"associated":[153],"online":[155],"rollouts":[156],"physical":[158],"robots.":[159],"Extensive":[160],"simulations":[161],"validate":[162],"that":[163],"our":[164],"AtomVLA":[165],"maintains":[166],"strong":[167],"robustness":[168],"under":[169],"perturbations.":[170],"When":[171],"evaluated":[172],"fundamental":[174],"baseline":[175],"models,":[176],"it":[177],"achieves":[178],"an":[179],"average":[180],"success":[181],"rate":[182],"97.0\\%":[184],"LIBERO":[187],"benchmark":[188],"48.0\\%":[190],"LIBERO-PRO":[193],"benchmark.":[194],"Finally,":[195],"experiments":[196],"conducted":[197],"real":[200],"using":[202],"Galaxea":[204],"R1":[205],"Lite":[206],"platform":[207],"confirm":[208],"its":[209],"broad":[210],"applicability":[211],"across":[212],"diverse":[213],"tasks,":[214],"especially":[215],"All":[218],"datasets,":[219],"checkpoints,":[220],"code":[222],"will":[223],"released":[225],"public":[228],"domain":[229],"following":[230],"acceptance":[232],"work":[235],"future":[237],"research.":[238]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-11T00:00:00"}
