{"id":"https://openalex.org/W7161707951","doi":"https://doi.org/10.48550/arxiv.2605.18287","title":"StableVLA: Towards Robust Vision-Language-Action Models without Extra Data","display_name":"StableVLA: Towards Robust Vision-Language-Action Models without Extra Data","publication_year":2026,"publication_date":"2026-05-18","ids":{"openalex":"https://openalex.org/W7161707951","doi":"https://doi.org/10.48550/arxiv.2605.18287"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.18287","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18287","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.18287","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136494211","display_name":"Yiyang Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Yiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003668019","display_name":"Chubin Zhang","orcid":"https://orcid.org/0009-0008-6353-2123"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Chubin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136484129","display_name":"Shukai Gong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gong, Shukai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136469805","display_name":"Yufan Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Yufan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101154996","display_name":"Kaiwei Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Kaiwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136467875","display_name":"Qiyang Min","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Min, Qiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136471387","display_name":"Qibin Hou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hou, Qibin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136500494","display_name":"Yansong Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Yansong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136462317","display_name":"Jianan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jianan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136496091","display_name":"Daquan Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Daquan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9171000123023987,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9171000123023987,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.028599999845027924,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.014499999582767487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7792999744415283},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5353999733924866},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.4332999885082245},{"id":"https://openalex.org/keywords/adapter","display_name":"Adapter (computing)","score":0.4253000020980835},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4189999997615814},{"id":"https://openalex.org/keywords/imperfect","display_name":"Imperfect","score":0.34940001368522644},{"id":"https://openalex.org/keywords/visual-inspection","display_name":"Visual inspection","score":0.33880001306533813}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7792999744415283},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6567999720573425},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5479999780654907},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5353999733924866},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.4332999885082245},{"id":"https://openalex.org/C177284502","wikidata":"https://www.wikidata.org/wiki/Q1005390","display_name":"Adapter (computing)","level":2,"score":0.4253000020980835},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4189999997615814},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38109999895095825},{"id":"https://openalex.org/C2780310539","wikidata":"https://www.wikidata.org/wiki/Q12547192","display_name":"Imperfect","level":2,"score":0.34940001368522644},{"id":"https://openalex.org/C168820333","wikidata":"https://www.wikidata.org/wiki/Q448889","display_name":"Visual inspection","level":2,"score":0.33880001306533813},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3246999979019165},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3237000107765198},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.3077000081539154},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2847999930381775},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.2818000018596649},{"id":"https://openalex.org/C2988416141","wikidata":"https://www.wikidata.org/wiki/Q6031139","display_name":"Information loss","level":2,"score":0.28060001134872437},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2754000127315521},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.25859999656677246}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.18287","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18287","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.18287","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18287","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.8162533044815063}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"It":[0],"is":[1],"infeasible":[2],"to":[3],"encompass":[4],"all":[5],"possible":[6],"disturbances":[7,57],"within":[8],"the":[9,18,60,80,105,136],"training":[10,61],"dataset.":[11],"This":[12],"raises":[13],"a":[14,40,51,71,126],"critical":[15],"question":[16],"regarding":[17],"robustness":[19,144],"of":[20,110],"Vision-Language-Action":[21],"(VLA)":[22],"models":[23,48],"when":[24,55],"encountering":[25],"unseen":[26],"real-world":[27],"visual":[28,33,56,91,170],"disturbances,":[29],"particularly":[30],"under":[31,165],"imperfect":[32],"conditions.":[34],"In":[35],"this":[36,67],"work,":[37],"we":[38,69],"conduct":[39],"systematic":[41],"study":[42],"based":[43],"on":[44,135,159],"recent":[45],"state-of-the-art":[46,148],"VLA":[47],"and":[49,121,132,162,168],"reveal":[50],"significant":[52],"performance":[53],"drop":[54],"absent":[58],"from":[59,90],"data":[62,97],"are":[63],"introduced.":[64],"To":[65],"mitigate":[66],"issue,":[68],"propose":[70],"lightweight":[72],"adapter":[73],"module":[74],"grounded":[75],"in":[76],"information":[77],"theory,":[78],"termed":[79],"Information":[81],"Bottleneck":[82],"Adapter":[83],"(IB-Adapter),":[84],"which":[85],"selectively":[86],"filters":[87],"potential":[88],"noise":[89],"inputs.":[92],"Without":[93],"requiring":[94],"any":[95],"extra":[96],"or":[98],"augmentation":[99],"strategies,":[100],"IB-Adapter":[101],"consistently":[102],"improves":[103],"over":[104],"baseline":[106],"by":[107],"an":[108],"average":[109],"30%,":[111],"while":[112],"adding":[113],"fewer":[114],"than":[115],"10M":[116],"parameters,":[117],"demonstrating":[118],"notable":[119],"efficiency":[120],"effectiveness.":[122],"Furthermore,":[123],"even":[124],"with":[125,146],"14x":[127],"smaller":[128],"backbone":[129],"(0.5B":[130],"parameters)":[131],"no":[133],"pre-training":[134],"Open":[137],"X-Embodiment":[138],"dataset,":[139],"our":[140,155],"model":[141],"StableVLA":[142],"achieves":[143],"competitive":[145],"7B-scale":[147],"VLAs.":[149],"With":[150],"negligible":[151],"parameter":[152],"overhead":[153],"(&lt;10M),":[154],"approach":[156],"maintains":[157],"accuracy":[158],"long-horizon":[160],"tasks":[161],"surpasses":[163],"OpenPi":[164],"both":[166],"synthetic":[167],"physical":[169],"corruptions.":[171]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-20T00:00:00"}
