{"id":"https://openalex.org/W7139961659","doi":"https://doi.org/10.48550/arxiv.2603.19201","title":"OmniVTA: Visuo-Tactile World Modeling for Contact-Rich Robotic Manipulation","display_name":"OmniVTA: Visuo-Tactile World Modeling for Contact-Rich Robotic Manipulation","publication_year":2026,"publication_date":"2026-03-19","ids":{"openalex":"https://openalex.org/W7139961659","doi":"https://doi.org/10.48550/arxiv.2603.19201"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.19201","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19201","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.19201","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130229326","display_name":"Yuhang Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zheng, Yuhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130220388","display_name":"Songen Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Songen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130221772","display_name":"Weize Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Weize","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130230459","display_name":"Yupeng Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Yupeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130251612","display_name":"Yujie Zang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zang, Yujie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130235115","display_name":"Shuai Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Shuai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130227510","display_name":"Xiang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130250853","display_name":"Ruihai Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao, Ce","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130235598","display_name":"Ce Hao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130244090","display_name":"Chen Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Si","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130232882","display_name":"Si Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Haoran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130239615","display_name":"Mr Haoran Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yilun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130244760","display_name":"Yilun Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Shuicheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127798520","display_name":"Shuicheng Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Wenchao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5130229326"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.47279998660087585,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.47279998660087585,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10914","display_name":"Tactile and Sensory Interactions","score":0.20489999651908875,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10338","display_name":"Advanced Sensor and Energy Harvesting Materials","score":0.15449999272823334,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5777999758720398},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.46779999136924744},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.45969998836517334},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4307999908924103},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4189000129699707},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.41449999809265137},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.3684999942779541},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.359499990940094}],"concepts":[{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5777999758720398},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5759999752044678},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5623999834060669},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.46779999136924744},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.45969998836517334},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4311000108718872},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4307999908924103},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4189000129699707},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.41449999809265137},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.3684999942779541},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.359499990940094},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3578000068664551},{"id":"https://openalex.org/C81302111","wikidata":"https://www.wikidata.org/wiki/Q2916417","display_name":"Contact force","level":2,"score":0.3472999930381775},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.33230000734329224},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.33070001006126404},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3287000060081482},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2953999936580658},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.29280000925064087},{"id":"https://openalex.org/C3017819093","wikidata":"https://www.wikidata.org/wiki/Q328835","display_name":"Tactile perception","level":3,"score":0.2831999957561493},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C152086174","wikidata":"https://www.wikidata.org/wiki/Q3030571","display_name":"Haptic technology","level":2,"score":0.27410000562667847},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26930001378059387},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2590999901294708},{"id":"https://openalex.org/C148043351","wikidata":"https://www.wikidata.org/wiki/Q4456944","display_name":"Current (fluid)","level":2,"score":0.25619998574256897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.19201","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19201","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.19201","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.19201","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Contact-rich":[0],"manipulation":[1,108],"tasks,":[2],"such":[3],"as":[4,57],"wiping":[5],"and":[6,16,46,51,89,137,147,168,174,195],"assembly,":[7],"require":[8],"accurate":[9],"perception":[10],"of":[11,180],"contact":[12,66,128,183],"forces,":[13],"friction":[14],"changes,":[15],"state":[17],"transitions":[18],"that":[19,110,142,163],"cannot":[20],"be":[21,198],"reliably":[22],"inferred":[23],"from":[24],"vision":[25],"alone.":[26],"Despite":[27],"growing":[28],"interest":[29],"in":[30,44,48,151],"visuo-tactile":[31,107,122],"manipulation,":[32],"progress":[33],"is":[34],"constrained":[35],"by":[36],"two":[37],"persistent":[38],"limitations:":[39],"existing":[40,166],"datasets":[41],"are":[42],"small":[43],"scale":[45],"narrow":[47],"task":[49],"coverage,":[50],"current":[52],"methods":[53,167],"treat":[54],"tactile":[55,118,149,187],"signals":[56,150],"passive":[58],"observations":[59],"rather":[60],"than":[61],"using":[62],"them":[63],"to":[64,171],"model":[65,124],"dynamics":[67],"or":[68],"enable":[69],"closed-loop":[70],"control":[71],"explicitly.":[72],"In":[73],"this":[74,100],"paper,":[75],"we":[76,102],"present":[77],"\\textbf{OmniViTac},":[78],"a":[79,105,116,120,130,138,152],"large-scale":[80],"visuo-tactile-action":[81],"dataset":[82],"comprising":[83],"$21{,}000+$":[84],"trajectories":[85],"across":[86,157],"$86$":[87],"tasks":[88],"$100+$":[90],"objects,":[91],"organized":[92],"into":[93],"six":[94,159],"physics-grounded":[95],"interaction":[96,160],"patterns.":[97],"Building":[98],"on":[99,202],"dataset,":[101],"propose":[103],"\\textbf{OmniVTA},":[104],"world-model-based":[106],"framework":[109],"integrates":[111],"four":[112],"tightly":[113],"coupled":[114],"modules:":[115],"self-supervised":[117],"encoder,":[119],"two-stream":[121],"world":[123],"for":[125,134,189],"predicting":[126],"short-horizon":[127],"evolution,":[129],"contact-aware":[131],"fusion":[132],"policy":[133],"action":[135],"generation,":[136],"60Hz":[139],"reflexive":[140],"controller":[141],"corrects":[143],"deviations":[144],"between":[145],"predicted":[146],"observed":[148],"closed":[153],"loop.":[154],"Real-robot":[155],"experiments":[156],"all":[158],"categories":[161],"show":[162],"OmniVTA":[164],"outperforms":[165],"generalizes":[169],"well":[170],"unseen":[172],"objects":[173],"geometric":[175],"configurations,":[176],"confirming":[177],"the":[178,203],"value":[179],"combining":[181],"predictive":[182],"modeling":[184],"with":[185],"high-frequency":[186],"feedback":[188],"contact-rich":[190],"manipulation.":[191],"All":[192],"data,":[193],"models,":[194],"code":[196],"will":[197],"made":[199],"publicly":[200],"available":[201],"project":[204],"website":[205],"at":[206],"https://mrsecant.github.io/OmniVTA.":[207]},"counts_by_year":[],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2026-03-21T00:00:00"}
