{"id":"https://openalex.org/W7134906271","doi":"https://doi.org/10.48550/arxiv.2603.09121","title":"DexHiL: A Human-in-the-Loop Framework for Vision-Language-Action Model Post-Training in Dexterous Manipulation","display_name":"DexHiL: A Human-in-the-Loop Framework for Vision-Language-Action Model Post-Training in Dexterous Manipulation","publication_year":2026,"publication_date":"2026-03-10","ids":{"openalex":"https://openalex.org/W7134906271","doi":"https://doi.org/10.48550/arxiv.2603.09121"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.09121","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09121","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.09121","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128706105","display_name":"Yifan Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Han, Yifan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128794012","display_name":"Zhongxi Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhongxi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128766947","display_name":"Yuxuan Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Yuxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029589907","display_name":"Congsheng Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Congsheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112008552","display_name":"Yanming Shao","orcid":"https://orcid.org/0009-0008-4196-1138"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shao, Yanming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128741989","display_name":"Yichuan Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Yichuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128720410","display_name":"Yao Mu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mu, Yao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128764723","display_name":"Wenzhao Lian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lian, Wenzhao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5128706105"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.7293999791145325,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.7293999791145325,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0997999981045723,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.02160000056028366,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/teleoperation","display_name":"Teleoperation","score":0.8162000179290771},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5322999954223633},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5266000032424927},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.49309998750686646},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4505999982357025},{"id":"https://openalex.org/keywords/robotic-arm","display_name":"Robotic arm","score":0.4214000105857849},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.39910000562667847},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.39489999413490295}],"concepts":[{"id":"https://openalex.org/C161759796","wikidata":"https://www.wikidata.org/wiki/Q3982902","display_name":"Teleoperation","level":3,"score":0.8162000179290771},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6126000285148621},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5322999954223633},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5266000032424927},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.49309998750686646},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4505999982357025},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4415000081062317},{"id":"https://openalex.org/C150415221","wikidata":"https://www.wikidata.org/wiki/Q40687","display_name":"Robotic arm","level":2,"score":0.4214000105857849},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.39910000562667847},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.39489999413490295},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.36809998750686646},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.34439998865127563},{"id":"https://openalex.org/C196467688","wikidata":"https://www.wikidata.org/wiki/Q1851985","display_name":"Telerobotics","level":4,"score":0.337799996137619},{"id":"https://openalex.org/C2775960376","wikidata":"https://www.wikidata.org/wiki/Q1435859","display_name":"Grippers","level":2,"score":0.3312000036239624},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3269999921321869},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.32330000400543213},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.31940001249313354},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.3188000023365021},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3149999976158142},{"id":"https://openalex.org/C92991967","wikidata":"https://www.wikidata.org/wiki/Q7644329","display_name":"Supervisory control","level":3,"score":0.2985999882221222},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.2976999878883362},{"id":"https://openalex.org/C2776544517","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Unexpected events","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C17500928","wikidata":"https://www.wikidata.org/wiki/Q959968","display_name":"Control system","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25780001282691956},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.09121","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09121","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.09121","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.09121","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"While":[0],"Vision-Language-Action":[1],"(VLA)":[2],"models":[3],"have":[4],"demonstrated":[5],"promising":[6],"generalization":[7],"capabilities":[8],"in":[9,71,153],"robotic":[10],"manipulation,":[11],"deploying":[12],"them":[13],"on":[14],"specific":[15],"and":[16,54,73,94],"complex":[17],"downstream":[18],"tasks":[19],"still":[20],"demands":[21],"effective":[22,135],"post-training.":[23],"In":[24],"parallel,":[25],"Human-in-the-Loop":[26],"(HiL)":[27],"learning":[28],"has":[29],"proven":[30],"to":[31,44],"be":[32],"a":[33,99,116,139],"powerful":[34],"mechanism":[35],"for":[36,84,113],"refining":[37],"robot":[38],"policies.":[39],"However,":[40],"extending":[41],"this":[42],"paradigm":[43],"dexterous":[45,67,85,96],"manipulation":[46],"remains":[47],"challenging:":[48],"multi-finger":[49],"control":[50],"is":[51],"high-dimensional,":[52],"contact-intensive,":[53],"exhibits":[55],"execution":[56],"distributions":[57],"that":[58,109,120,130],"differ":[59],"markedly":[60],"from":[61],"standard":[62,144],"arm":[63,93],"motions,":[64],"leaving":[65],"existing":[66],"VLA":[68,86],"systems":[69],"limited":[70],"reliability":[72],"adaptability.":[74],"We":[75],"present":[76],"DexHiL,":[77],"the":[78,92,95],"first":[79],"integrated":[80],"arm-hand":[81],"human-in-the-loop":[82],"framework":[83],"models,":[87],"enabling":[88],"coordinated":[89],"interventions":[90],"over":[91],"hand":[97],"within":[98],"single":[100],"system.":[101],"DexHiL":[102,131],"introduces":[103],"an":[104,134,149],"intervention-aware":[105],"data":[106],"sampling":[107],"strategy":[108],"prioritizes":[110],"corrective":[111],"segments":[112],"post-training,":[114],"alongside":[115],"lightweight":[117],"teleoperation":[118],"interface":[119],"supports":[121],"instantaneous":[122],"human":[123],"corrections":[124],"during":[125],"execution.":[126],"Real-robot":[127],"experiments":[128],"demonstrate":[129],"serves":[132],"as":[133],"post-training":[136],"framework,":[137],"yielding":[138],"substantial":[140],"performance":[141],"leap,":[142],"outperforming":[143],"offline-only":[145],"fine-tuning":[146],"baselines":[147],"by":[148],"average":[150],"of":[151],"25%":[152],"success":[154],"rates":[155],"across":[156],"distinct":[157],"tasks.":[158],"Project":[159],"page:":[160],"https://chenzhongxi-sjtu.github.io/dexhil/":[161]},"counts_by_year":[],"updated_date":"2026-03-12T06:18:43.230356","created_date":"2026-03-12T00:00:00"}
