{"id":"https://openalex.org/W7128523183","doi":"https://doi.org/10.48550/arxiv.2602.07413","title":"Going with the Flow: Koopman Behavioral Models as Implicit Planners for Visuo-Motor Dexterity","display_name":"Going with the Flow: Koopman Behavioral Models as Implicit Planners for Visuo-Motor Dexterity","publication_year":2026,"publication_date":"2026-02-07","ids":{"openalex":"https://openalex.org/W7128523183","doi":"https://doi.org/10.48550/arxiv.2602.07413"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.07413","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101500088","display_name":"Yunhai Han","orcid":"https://orcid.org/0000-0003-3119-0129"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Han, Yunhai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079815370","display_name":"LinHao Bai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Linhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125586467","display_name":"Ziyu Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Ziyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100314590","display_name":"Zhaodong Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Zhaodong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125545063","display_name":"Yogita Choudhary","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choudhary, Yogita","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125574263","display_name":"Krishna Jha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jha, Krishna","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124941914","display_name":"Chuizheng Kong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kong, Chuizheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019247599","display_name":"Shreyas Kousik","orcid":"https://orcid.org/0000-0003-1348-7463"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kousik, Shreyas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5087370510","display_name":"Harish Ravichandar","orcid":"https://orcid.org/0000-0002-6635-2637"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ravichandar, Harish","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5101500088"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.7788000106811523,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.7788000106811523,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.038100000470876694,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.02280000038444996,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5228999853134155},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.516700029373169},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.484499990940094},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4814000129699707},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.4797999858856201},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.45210000872612},{"id":"https://openalex.org/keywords/operationalization","display_name":"Operationalization","score":0.43230000138282776},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.37770000100135803},{"id":"https://openalex.org/keywords/workspace","display_name":"Workspace","score":0.36039999127388},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.35510000586509705}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6991999745368958},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6237999796867371},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5228999853134155},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.516700029373169},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.484499990940094},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4814000129699707},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.4797999858856201},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4595000147819519},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.45210000872612},{"id":"https://openalex.org/C9354725","wikidata":"https://www.wikidata.org/wiki/Q286017","display_name":"Operationalization","level":2,"score":0.43230000138282776},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.37770000100135803},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3668000102043152},{"id":"https://openalex.org/C58581272","wikidata":"https://www.wikidata.org/wiki/Q12741163","display_name":"Workspace","level":3,"score":0.36039999127388},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.35510000586509705},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.34929999709129333},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3467000126838684},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.34360000491142273},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.3395000100135803},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.33719998598098755},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.31929999589920044},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.3176000118255615},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3172999918460846},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.2928999960422516},{"id":"https://openalex.org/C152086174","wikidata":"https://www.wikidata.org/wiki/Q3030571","display_name":"Haptic technology","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C2777379011","wikidata":"https://www.wikidata.org/wiki/Q938545","display_name":"Implicit learning","level":3,"score":0.28380000591278076},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.267300009727478},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2623000144958496},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.25870001316070557},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.2563999891281128},{"id":"https://openalex.org/C79379906","wikidata":"https://www.wikidata.org/wiki/Q3174497","display_name":"Dynamical systems theory","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.07413","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.07413","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.07413","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.07413","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"There":[0],"has":[1],"been":[2],"rapid":[3],"and":[4,25,35,38,58,74,107,164,232,240,265],"dramatic":[5],"progress":[6],"in":[7,16,156,216],"learning":[8],"complex":[9],"visuo-motor":[10],"manipulation":[11],"skills":[12,54,92],"from":[13,41],"demonstrations,":[14],"thanks":[15],"part":[17],"to":[18,64,89,150,263],"expressive":[19],"policy":[20],"classes":[21],"that":[22,87,97,145,176,226,246],"employ":[23],"diffusion-":[24],"transformer-based":[26],"backbones.":[27],"However,":[28],"these":[29,135],"design":[30],"choices":[31],"require":[32],"significant":[33],"data":[34],"computational":[36],"resources":[37],"remain":[39],"far":[40],"reliable,":[42],"particularly":[43],"within":[44],"the":[45,103,111,158,191,196,203,218,251],"context":[46],"of":[47,102,110,143,161,199,253],"multi-fingered":[48],"dexterous":[49,91],"manipulation.":[50],"Fundamentally,":[51],"they":[52],"model":[53,219],"as":[55,93,181,221],"reactive":[56],"mappings":[57],"rely":[59],"on":[60],"fixed-horizon":[61],"action":[62],"chunking":[63],"mitigate":[65],"jitter,":[66],"creating":[67],"a":[68,85,140,153,170],"rigid":[69],"trade-off":[70],"between":[71],"temporal":[72,124],"coherence":[73,125],"reactivity.":[75],"In":[76],"this":[77],"work,":[78],"we":[79,137,210,244],"introduce":[80,211],"Unified":[81],"Behavioral":[82],"Models":[83],"(UBMs),":[84],"framework":[86],"learns":[88],"represent":[90],"coupled":[94],"dynamical":[95],"systems":[96],"capture":[98],"how":[99],"visual":[100,163,200,234],"features":[101,166,201],"environment":[104],"(visual":[105],"flow)":[106,114],"proprioceptive":[108,165],"states":[109],"robot":[112,193],"(action":[113],"co-evolve.":[115],"By":[116],"capturing":[117],"such":[118],"behavioral":[119],"dynamics,":[120],"UBMs":[121,144],"can":[122,178],"ensure":[123],"by":[126,130,169],"construction":[127],"rather":[128],"than":[129],"heuristic":[131],"averaging.":[132],"To":[133,207],"operationalize":[134],"models,":[136],"propose":[138],"Koopman-UBM,":[139],"first":[141],"instantiation":[142],"leverages":[146],"Koopman":[147],"Operator":[148],"theory":[149],"effectively":[151],"learn":[152],"unified":[154],"representation":[155],"which":[157,217],"joint":[159],"flow":[160,198,235],"latent":[162],"is":[167],"governed":[168],"structured":[171],"linear":[172],"system.":[173],"We":[174],"demonstrate":[175,245],"Koopman-UBM":[177],"be":[179],"viewed":[180],"an":[182,186,212],"implicit":[183],"planner:":[184],"given":[185],"initial":[187],"condition,":[188],"it":[189],"computes":[190],"desired":[192],"behavior":[194],"with":[195],"resulting":[197],"over":[202],"entire":[204],"skill":[205],"horizon.":[206],"enable":[208],"reactivity,":[209],"online":[213],"replanning":[214,229],"strategy":[215],"acts":[220],"its":[222],"own":[223],"runtime":[224],"monitor":[225],"automatically":[227],"triggers":[228],"when":[230],"predicted":[231],"observed":[233],"diverge.":[236],"Across":[237],"seven":[238],"simulated":[239],"two":[241],"real-world":[242],"tasks,":[243],"K-UBM":[247],"matches":[248],"or":[249],"exceeds":[250],"performance":[252],"SOTA":[254],"baselines,":[255],"while":[256],"offering":[257],"faster":[258],"inference,":[259],"smooth":[260],"execution,":[261],"robustness":[262],"occlusions,":[264],"flexible":[266],"replanning.":[267]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-11T00:00:00"}
