{"id":"https://openalex.org/W7160852052","doi":"https://doi.org/10.48550/arxiv.2605.07288","title":"Sword: Style-Robust World Models as Simulators via Dynamic Latent Bootstrapping for VLA Policy Post-Training","display_name":"Sword: Style-Robust World Models as Simulators via Dynamic Latent Bootstrapping for VLA Policy Post-Training","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160852052","doi":"https://doi.org/10.48550/arxiv.2605.07288"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.07288","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07288","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.07288","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135894860","display_name":"Jiaxuan Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Jiaxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125213178","display_name":"Yongjian Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Yongjian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125195887","display_name":"Zhong Guan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guan, Zhong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135858997","display_name":"Wen Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Wen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011383481","display_name":"Wanlun Ma","orcid":"https://orcid.org/0000-0002-6305-1740"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Wanlun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135890989","display_name":"Xi Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Xi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102937576","display_name":"Junwu Xiong","orcid":"https://orcid.org/0009-0008-2028-510X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Junwu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135830538","display_name":"Sheng Wen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Sheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6757000088691711,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6757000088691711,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.20819999277591705,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.023900000378489494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5752999782562256},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5091999769210815},{"id":"https://openalex.org/keywords/bootstrapping","display_name":"Bootstrapping (finance)","score":0.44510000944137573},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4350999891757965},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.4223000109195709},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4081999957561493},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4074000120162964},{"id":"https://openalex.org/keywords/real-world-data","display_name":"Real world data","score":0.40119999647140503}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7333999872207642},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5752999782562256},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5432999730110168},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5091999769210815},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47839999198913574},{"id":"https://openalex.org/C207609745","wikidata":"https://www.wikidata.org/wiki/Q4944086","display_name":"Bootstrapping (finance)","level":2,"score":0.44510000944137573},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4350999891757965},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4223000109195709},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4081999957561493},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4074000120162964},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.40119999647140503},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.3887999951839447},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3682999908924103},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.321399986743927},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.32120001316070557},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.30720001459121704},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.302700012922287},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27410000562667847},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C73602740","wikidata":"https://www.wikidata.org/wiki/Q7795822","display_name":"Thompson sampling","level":3,"score":0.2669999897480011},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.2596000134944916}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.07288","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07288","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.07288","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07288","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5085235834121704,"display_name":"No poverty","id":"https://metadata.un.org/sdg/1"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"integration":[1],"of":[2,94,103,131,175,185],"Vision-Language-Action":[3],"(VLA)":[4],"models":[5,58],"with":[6],"World":[7,18,43,104,117],"Models":[8,19,44,105],"has":[9],"gained":[10],"increasing":[11],"attention.":[12],"One":[13],"representative":[14],"approach":[15],"treats":[16],"learned":[17],"as":[20,32,38,106],"generative":[21],"simulators,":[22],"enabling":[23],"policy":[24],"optimization":[25],"entirely":[26],"within":[27],"\"imagination.\"":[28],"However,":[29],"when":[30],"deployed":[31],"simulators":[33],"for":[34,188],"specific":[35],"environments":[36,133],"such":[37],"the":[39,90,101,128,161,170,182],"LIBERO":[40,162],"benchmark,":[41],"existing":[42],"often":[45],"suffer":[46],"from":[47,134],"poor":[48],"generalization":[49],"and":[50,70,92,151,181],"long-horizon":[51,85],"error":[52,86],"accumulation.":[53],"During":[54],"closed-loop":[55],"rollouts,":[56],"these":[57,110],"are":[59],"highly":[60],"sensitive":[61],"to":[62,79,126],"initial-state":[63],"perturbations;":[64],"minor":[65],"changes":[66],"in":[67,173],"color,":[68],"illumination,":[69],"other":[71],"visual":[72,129],"factors":[73],"can":[74],"trigger":[75],"cascading":[76],"hallucinations,":[77],"leading":[78],"severe":[80],"blurriness":[81],"or":[82],"overexposure.":[83],"Moreover,":[84],"accumulation":[87],"further":[88,141],"degrades":[89],"quality":[91],"fidelity":[93],"predicted":[95],"future":[96],"states.":[97],"These":[98],"issues":[99],"limit":[100],"reliability":[102],"simulators.":[107],"To":[108],"mitigate":[109],"problems,":[111],"we":[112],"propose":[113,142],"Sword,":[114],"a":[115],"robust":[116],"Model":[118],"framework.":[119],"Our":[120],"method":[121,167],"introduces":[122],"Structure-Guided":[123],"Style":[124],"Augmentation":[125],"disentangle":[127],"textures":[130],"interactive":[132],"task-relevant":[135],"dynamics,":[136],"thereby":[137],"improving":[138],"generalization.":[139],"We":[140],"Dynamic":[143],"Latent":[144],"Bootstrapping,":[145],"which":[146],"maintains":[147],"consistency":[148],"between":[149],"training":[150],"inference":[152],"while":[153],"keeping":[154],"memory":[155],"consumption":[156],"low.":[157],"Extensive":[158],"experiments":[159],"on":[160],"benchmark":[163],"show":[164],"that":[165],"our":[166],"significantly":[168],"outperforms":[169],"baseline":[171],"WoVR":[172],"terms":[174],"generalization,":[176],"generation":[177],"quality,":[178],"robustness,":[179],"fidelity,":[180],"success":[183],"rate":[184],"reinforcement-learning":[186],"post-training":[187],"VLA":[189],"models.":[190]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-12T00:00:00"}
