{"id":"https://openalex.org/W7128544995","doi":"https://doi.org/10.48550/arxiv.2602.08440","title":"SteerVLA: Steering Vision-Language-Action Models in Long-Tail Driving Scenarios","display_name":"SteerVLA: Steering Vision-Language-Action Models in Long-Tail Driving Scenarios","publication_year":2026,"publication_date":"2026-02-09","ids":{"openalex":"https://openalex.org/W7128544995","doi":"https://doi.org/10.48550/arxiv.2602.08440"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.08440","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08440","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.08440","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125490371","display_name":"Tian Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gao, Tian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125476369","display_name":"Celine Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Celine","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047300243","display_name":"Catherine Glossop","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Glossop, Catherine","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125555619","display_name":"Timothy Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Timothy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125539984","display_name":"Jiankai Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Jiankai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063799945","display_name":"Kyle Stachowicz","orcid":"https://orcid.org/0000-0002-9880-7261"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stachowicz, Kyle","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101786641","display_name":"Shirley Wu","orcid":"https://orcid.org/0000-0003-3526-800X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Shirley","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104453432","display_name":"Mees Oier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mees, Oier","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sadigh, Dorsa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sadigh, Dorsa","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Levine, Sergey","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Levine, Sergey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125574074","display_name":"Chelsea Finn","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Finn, Chelsea","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5125490371"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6606000065803528,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6606000065803528,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.18979999423027039,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.05979999899864197,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.8300999999046326},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5231000185012817},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5087000131607056},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.48019999265670776},{"id":"https://openalex.org/keywords/automation","display_name":"Automation","score":0.42179998755455017},{"id":"https://openalex.org/keywords/language-understanding","display_name":"Language understanding","score":0.32820001244544983},{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.30630001425743103}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.8300999999046326},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6460000276565552},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5231000185012817},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5087000131607056},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.48019999265670776},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4309000074863434},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4219000041484833},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.42179998755455017},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.32820001244544983},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.30630001425743103},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.28839999437332153},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.288100004196167},{"id":"https://openalex.org/C87833898","wikidata":"https://www.wikidata.org/wiki/Q1060280","display_name":"Advanced driver assistance systems","level":2,"score":0.28299999237060547},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.2754000127315521},{"id":"https://openalex.org/C17500928","wikidata":"https://www.wikidata.org/wiki/Q959968","display_name":"Control system","level":2,"score":0.2727999985218048},{"id":"https://openalex.org/C2780689630","wikidata":"https://www.wikidata.org/wiki/Q2081815","display_name":"Driving simulator","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.08440","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08440","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.08440","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08440","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.657283365726471}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"A":[0],"fundamental":[1],"challenge":[2],"in":[3,69,124,181],"autonomous":[4,51],"driving":[5,64,70,95,148,183],"is":[6,101,196],"the":[7,38,55,80,107,115,125,129],"integration":[8],"of":[9,58,83,128],"high-level,":[10],"semantic":[11],"reasoning":[12,81,123,162],"for":[13,20,42,160],"long-tail":[14,191],"events":[15],"with":[16,138,150],"low-level,":[17],"reactive":[18],"control":[19,68,126],"robust":[21,67],"driving.":[22],"While":[23],"large":[24],"vision-language":[25],"models":[26],"(VLMs)":[27],"trained":[28],"on":[29,168,189],"web-scale":[30],"data":[31,149],"offer":[32],"powerful":[33],"common-sense":[34],"reasoning,":[35],"they":[36],"lack":[37],"grounded":[39],"experience":[40],"necessary":[41],"safe":[43],"vehicle":[44,139],"control.":[45],"We":[46,165],"posit":[47],"that":[48,90],"an":[49],"effective":[50,161],"agent":[52],"should":[53],"leverage":[54,142],"world":[56],"knowledge":[57],"VLMs":[59,84],"to":[60,85,98,118,145,157],"guide":[61],"a":[62,92,143,169,190],"steerable":[63],"policy":[65,117],"toward":[66],"scenarios.":[71],"To":[72,132],"this":[73,102],"end,":[74],"we":[75,141,155],"propose":[76],"SteerVLA,":[77],"which":[78,113,154],"leverages":[79],"capabilities":[82],"produce":[86],"fine-grained":[87,134],"language":[88,104,135,152],"instructions":[89],"steer":[91],"vision-language-action":[93],"(VLA)":[94],"policy.":[96,131],"Key":[97],"our":[99],"method":[100],"rich":[103],"interface":[105],"between":[106],"high-level":[108,116],"VLM":[109,144],"and":[110,163,185],"low-level":[111,130],"VLA,":[112],"allows":[114],"more":[119],"effectively":[120],"ground":[121],"its":[122],"outputs":[127],"provide":[133],"supervision":[136],"aligned":[137],"control,":[140],"augment":[146],"existing":[147],"detailed":[151],"annotations,":[153],"find":[156],"be":[158],"essential":[159],"steerability.":[164],"evaluate":[166],"SteerVLA":[167],"challenging":[170],"closed-loop":[171],"benchmark,":[172],"where":[173],"it":[174],"outperforms":[175],"state-of-the-art":[176],"methods":[177],"by":[178,186],"4.77":[179],"points":[180,188],"overall":[182],"score":[184],"8.04":[187],"subset.":[192],"The":[193],"project":[194],"website":[195],"available":[197],"at:":[198],"https://steervla.github.io/.":[199]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2026-02-11T00:00:00"}
