{"id":"https://openalex.org/W7138108985","doi":"https://doi.org/10.48550/arxiv.2603.15620","title":"Towards Generalizable Robotic Manipulation in Dynamic Environments","display_name":"Towards Generalizable Robotic Manipulation in Dynamic Environments","publication_year":2026,"publication_date":"2026-03-16","ids":{"openalex":"https://openalex.org/W7138108985","doi":"https://doi.org/10.48550/arxiv.2603.15620"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.15620","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15620","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.15620","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129646691","display_name":"Heng Fang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Fang, Heng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129646275","display_name":"Shangru Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Shangru","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020637336","display_name":"Shuhan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Shuhan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103721879","display_name":"Xuanyang Xi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xi, Xuanyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129699975","display_name":"Dingkang Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Dingkang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129648730","display_name":"Xiang Bai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Xiang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5129646691"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.6453999876976013,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.6453999876976013,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.2054000049829483,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.0284000001847744,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.6819999814033508},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6480000019073486},{"id":"https://openalex.org/keywords/dynamic-data","display_name":"Dynamic data","score":0.5795000195503235},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4672999978065491},{"id":"https://openalex.org/keywords/scarcity","display_name":"Scarcity","score":0.4253999888896942},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.36629998683929443},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3434999883174896},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.3375000059604645}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7178999781608582},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.6819999814033508},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6480000019073486},{"id":"https://openalex.org/C197298091","wikidata":"https://www.wikidata.org/wiki/Q5318963","display_name":"Dynamic data","level":2,"score":0.5795000195503235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49950000643730164},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48980000615119934},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4672999978065491},{"id":"https://openalex.org/C109747225","wikidata":"https://www.wikidata.org/wiki/Q815758","display_name":"Scarcity","level":2,"score":0.4253999888896942},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.36629998683929443},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3434999883174896},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.3375000059604645},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.32850000262260437},{"id":"https://openalex.org/C2780504989","wikidata":"https://www.wikidata.org/wiki/Q2742037","display_name":"Dynamic capabilities","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2888000011444092},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.28839999437332153},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.28600001335144043},{"id":"https://openalex.org/C13540734","wikidata":"https://www.wikidata.org/wiki/Q5318996","display_name":"Dynamic network analysis","level":2,"score":0.28220000863075256},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.27379998564720154},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.26269999146461487},{"id":"https://openalex.org/C118702147","wikidata":"https://www.wikidata.org/wiki/Q189396","display_name":"Dynamic random-access memory","level":3,"score":0.2526000142097473}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.15620","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15620","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.15620","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15620","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-Language-Action":[0],"(VLA)":[1],"models":[2],"excel":[3],"in":[4,9,139],"static":[5,159],"manipulation":[6,25],"but":[7],"struggle":[8],"dynamic":[10,24,54,80,87,94,150],"environments":[11],"with":[12,59,124],"moving":[13],"targets.":[14],"This":[15],"performance":[16],"gap":[17],"primarily":[18],"stems":[19],"from":[20],"a":[21,47,67,100,135],"scarcity":[22],"of":[23,30,93],"datasets":[26],"and":[27,50,66,89,110,163],"the":[28,91],"reliance":[29],"mainstream":[31],"VLAs":[32,78],"on":[33,79,149],"single-frame":[34],"observations,":[35],"restricting":[36],"their":[37],"spatiotemporal":[38,154],"reasoning":[39],"capabilities.":[40],"To":[41],"address":[42],"this,":[43],"we":[44,74,97,145],"introduce":[45],"DOMINO,":[46],"large-scale":[48],"dataset":[49],"benchmark":[51],"for":[52,86],"generalizable":[53],"manipulation,":[55],"featuring":[56],"35":[57],"tasks":[58],"hierarchical":[60],"complexities,":[61],"over":[62,142],"110K":[63],"expert":[64],"trajectories,":[65],"multi-dimensional":[68],"evaluation":[69],"suite.":[70],"Through":[71],"comprehensive":[72],"experiments,":[73],"systematically":[75],"evaluate":[76],"existing":[77],"tasks,":[81],"explore":[82],"effective":[83],"training":[84,148],"strategies":[85],"awareness,":[88],"validate":[90],"generalizability":[92],"data.":[95],"Furthermore,":[96],"propose":[98],"PUMA,":[99],"dynamics-aware":[101],"VLA":[102],"architecture.":[103],"By":[104],"integrating":[105],"scene-centric":[106],"historical":[107],"optical":[108],"flow":[109],"specialized":[111],"world":[112],"queries":[113],"to":[114,158],"implicitly":[115],"forecast":[116],"object-centric":[117],"future":[118],"states,":[119],"PUMA":[120,130],"couples":[121],"history-aware":[122],"perception":[123],"short-horizon":[125],"prediction.":[126],"Results":[127],"demonstrate":[128],"that":[129,147,156],"achieves":[131],"state-of-the-art":[132],"performance,":[133],"yielding":[134],"6.3%":[136],"absolute":[137],"improvement":[138],"success":[140],"rate":[141],"baselines.":[143],"Moreover,":[144],"show":[146],"data":[151,164],"fosters":[152],"robust":[153],"representations":[155],"transfer":[157],"tasks.":[160],"All":[161],"code":[162],"are":[165],"available":[166],"at":[167],"https://github.com/H-EmbodVis/DOMINO.":[168]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
