{"id":"https://openalex.org/W7139047685","doi":"https://doi.org/10.48550/arxiv.2603.17300","title":"ReSteer: Quantifying and Refining the Steerability of Multitask Robot Policies","display_name":"ReSteer: Quantifying and Refining the Steerability of Multitask Robot Policies","publication_year":2026,"publication_date":"2026-03-18","ids":{"openalex":"https://openalex.org/W7139047685","doi":"https://doi.org/10.48550/arxiv.2603.17300"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.17300","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17300","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.17300","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129906859","display_name":"Zhenyang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Zhenyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053151459","display_name":"Alan Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Alan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101910810","display_name":"Liquan Wang","orcid":"https://orcid.org/0000-0002-6470-4889"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Liquan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129995831","display_name":"Benjamin Joffe","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joffe, Benjamin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130019847","display_name":"Yingyan Celine Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Yingyan Celine","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129866783","display_name":"Yuxiao Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yuxiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066892638","display_name":"Siddharth Karamcheti","orcid":"https://orcid.org/0000-0003-2153-2455"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karamcheti, Siddharth","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129764377","display_name":"Danfei Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Danfei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5129906859"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.8601999878883362,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.8601999878883362,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.07240000367164612,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.012799999676644802,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7192000150680542},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6740000247955322},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.4772000014781952},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.46230000257492065},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4271000027656555},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.3783000111579895},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.3716999888420105},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.3582000136375427}],"concepts":[{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7192000150680542},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6740000247955322},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6711999773979187},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.4772000014781952},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.46230000257492065},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4271000027656555},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4259999990463257},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.3783000111579895},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.3716999888420105},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.3582000136375427},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.3472999930381775},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.34150001406669617},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.33550000190734863},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.30889999866485596},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.28760001063346863},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.28299999237060547},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.28220000863075256},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.2703999876976013}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.17300","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17300","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.17300","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17300","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/1","score":0.47147136926651,"display_name":"No poverty"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"strong":[1],"multi-task":[2],"pretraining,":[3],"existing":[4],"policies":[5],"often":[6],"exhibit":[7],"poor":[8],"task":[9,57,88,182],"steerability.":[10,76],"For":[11],"example,":[12],"a":[13,20,51,72,93,114,125,138],"robot":[14,61,202],"may":[15],"fail":[16],"to":[17,19,53,96,176,179],"respond":[18],"new":[21],"instruction":[22],"``put":[23],"the":[24,27,32,36,146],"bowl":[25],"in":[26,59],"sink\"":[28],"when":[29,45],"moving":[30],"towards":[31],"oven,":[33],"executing":[34],"``close":[35],"oven\",":[37],"even":[38],"though":[39],"it":[40],"can":[41],"complete":[42],"both":[43],"tasks":[44],"executed":[46],"separately.":[47],"We":[48,63,77,186],"propose":[49],"ReSteer,":[50],"framework":[52],"quantify":[54],"and":[55,91,136,196],"improve":[56],"steerability":[58,80,109,115,144,155,168,195],"multitask":[60],"policies.":[62,203],"conduct":[64],"an":[65],"exhaustive":[66],"evaluation":[67],"of":[68,75],"state-of-the-art":[69],"policies,":[70],"revealing":[71],"common":[73],"lack":[74],"find":[78],"that":[79,117,129,141,166],"is":[81,169],"associated":[82],"with":[83],"limited":[84],"overlap":[85,99],"among":[86],"training":[87],"trajectory":[89],"distributions,":[90],"introduce":[92],"proxy":[94],"metric":[95],"measure":[97],"this":[98,105,188],"from":[100,133],"policy":[101,143],"behavior.":[102],"Building":[103],"on":[104,151,193],"insight,":[106],"ReSteer":[107,153],"improves":[108,142,154],"via":[110],"three":[111],"components:":[112],"(i)":[113],"estimator":[116],"identifies":[118],"low-steerability":[119],"states":[120],"without":[121],"full-rollout":[122],"evaluation,":[123],"(ii)":[124],"steerable":[126],"data":[127,197],"generator":[128],"synthesizes":[130],"motion":[131],"segments":[132],"these":[134],"states,":[135],"(iii)":[137],"self-refinement":[139],"pipeline":[140],"using":[145],"generated":[147],"data.":[148],"In":[149,161],"simulation":[150],"LIBERO,":[152],"by":[156],"11\\%":[157],"over":[158],"18k":[159],"rollouts.":[160],"real-world":[162],"experiments,":[163],"we":[164],"show":[165],"improved":[167],"critical":[170],"for":[171,200],"interactive":[172],"use,":[173],"enabling":[174],"users":[175],"instruct":[177],"robots":[178],"perform":[180],"any":[181,184],"at":[183],"time.":[185],"hope":[187],"work":[189],"motivates":[190],"further":[191],"study":[192],"quantifying":[194],"collection":[198],"strategies":[199],"large":[201]},"counts_by_year":[],"updated_date":"2026-03-20T20:54:20.808490","created_date":"2026-03-20T00:00:00"}
