{"id":"https://openalex.org/W7155214029","doi":"https://doi.org/10.48550/arxiv.2604.19710","title":"SpanVLA: Efficient Action Bridging and Learning from Negative-Recovery Samples for Vision-Language-Action Model","display_name":"SpanVLA: Efficient Action Bridging and Learning from Negative-Recovery Samples for Vision-Language-Action Model","publication_year":2026,"publication_date":"2026-04-21","ids":{"openalex":"https://openalex.org/W7155214029","doi":"https://doi.org/10.48550/arxiv.2604.19710"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.19710","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19710","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.19710","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134354867","display_name":"Zewei Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Zewei","raw_affiliation_strings":["Tony"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tony","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134282439","display_name":"Ruining Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Ruining","raw_affiliation_strings":["Tony"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tony","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134312647","display_name":"Xuewei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xuewei","raw_affiliation_strings":["Tony"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tony","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134254062","display_name":"Qi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037416485","display_name":"Yiluan Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Yiluan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100739426","display_name":"Xiaotong Chen","orcid":"https://orcid.org/0000-0002-4964-5286"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Sherry X.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134288146","display_name":"Tao Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Tao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015099378","display_name":"Kateryna Pistunova","orcid":"https://orcid.org/0000-0002-3170-1657"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pistunova, Kateryna","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108335616","display_name":"Yishan Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Yishan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134255496","display_name":"Lili Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Lili","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134254652","display_name":"Jiaqi Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Jiaqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9675999879837036,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9675999879837036,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.006200000178068876,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.0038999998942017555,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6658999919891357},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.63919997215271},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6324999928474426},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.4465000033378601},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.3474999964237213},{"id":"https://openalex.org/keywords/automated-planning-and-scheduling","display_name":"Automated planning and scheduling","score":0.337799996137619},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.31150001287460327},{"id":"https://openalex.org/keywords/structured-prediction","display_name":"Structured prediction","score":0.30390000343322754}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6658999919891357},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6542999744415283},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.63919997215271},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6324999928474426},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6294999718666077},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5546000003814697},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.4465000033378601},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.3474999964237213},{"id":"https://openalex.org/C114073186","wikidata":"https://www.wikidata.org/wiki/Q2631895","display_name":"Automated planning and scheduling","level":2,"score":0.337799996137619},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.31150001287460327},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C2780210234","wikidata":"https://www.wikidata.org/wiki/Q422638","display_name":"Action plan","level":2,"score":0.2606000006198883},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.25870001316070557},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2531999945640564},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.19710","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19710","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.19710","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19710","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-Language-Action":[0],"(VLA)":[1],"models":[2,23],"offer":[3],"a":[4,48,59,84,111,147],"promising":[5],"autonomous":[6,51],"driving":[7,52,126,150],"paradigm":[8],"for":[9],"leveraging":[10],"world":[11],"knowledge":[12],"and":[13,38,58,73,103,139,158,167,188],"reasoning":[14,57,74,151],"capabilities,":[15],"especially":[16],"in":[17,30],"long-tail":[18],"scenarios.":[19],"However,":[20],"existing":[21],"VLA":[22,118],"often":[24],"struggle":[25],"with":[26],"the":[27,71,101,106,117,135,164,170,174,178,185],"high":[28],"latency":[29],"action":[31,61],"generation":[32,36],"using":[33,83],"an":[34,55,66],"autoregressive":[35,56],"framework":[37],"exhibit":[39],"limited":[40],"robustness.":[41],"In":[42],"this":[43],"paper,":[44],"we":[45,109],"propose":[46,110],"SpanVLA,":[47],"novel":[49],"end-to-end":[50],"framework,":[53],"integrating":[54],"flow-matching":[60,85],"expert.":[62],"First,":[63],"SpanVLA":[64,107,175],"introduces":[65],"efficient":[67],"bridge":[68],"to":[69,78,98,115,122,130,133],"leverage":[70],"vision":[72],"guidance":[75],"of":[76,105,173,190],"VLM":[77],"efficiently":[79],"plan":[80],"future":[81],"trajectories":[82],"policy":[86],"conditioned":[87],"on":[88,154,163],"historical":[89],"trajectory":[90],"initialization,":[91],"which":[92],"significantly":[93],"reduces":[94],"inference":[95],"time.":[96],"Second,":[97],"further":[99,144],"improve":[100],"performance":[102,172,187],"robustness":[104,189],"model,":[108],"GRPO-based":[112],"post-training":[113],"method":[114],"enable":[116],"model":[119],"not":[120],"only":[121],"learn":[123,131,140],"from":[124],"positive":[125],"samples":[127],"but":[128],"also":[129],"how":[132],"avoid":[134],"typical":[136],"negative":[137],"behaviors":[138],"recovery":[141],"behaviors.":[142],"We":[143],"introduce":[145],"mReasoning,":[146],"new":[148],"real-world":[149],"dataset,":[152],"focusing":[153],"complex,":[155],"reasoning-demanding":[156],"scenarios":[157,183],"negative-recovery":[159],"samples.":[160],"Extensive":[161],"experiments":[162],"NAVSIM":[165],"(v1":[166],"v2)":[168],"demonstrate":[169],"competitive":[171],"model.":[176,192],"Additionally,":[177],"qualitative":[179],"results":[180],"across":[181],"diverse":[182],"highlight":[184],"planning":[186],"our":[191]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-23T00:00:00"}
