{"id":"https://openalex.org/W7136130211","doi":"https://doi.org/10.48550/arxiv.2603.12665","title":"TacVLA: Contact-Aware Tactile Fusion for Robust Vision-Language-Action Manipulation","display_name":"TacVLA: Contact-Aware Tactile Fusion for Robust Vision-Language-Action Manipulation","publication_year":2026,"publication_date":"2026-03-13","ids":{"openalex":"https://openalex.org/W7136130211","doi":"https://doi.org/10.48550/arxiv.2603.12665"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.12665","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12665","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.12665","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129484628","display_name":"Kaidi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Kaidi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129495820","display_name":"Heng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Heng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100583938","display_name":"Zhengtong Xu","orcid":"https://orcid.org/0000-0002-2789-1910"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Zhengtong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129637597","display_name":"Zhiyuan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085716059","display_name":"Md Rakibul Islam Prince","orcid":"https://orcid.org/0000-0002-4095-7080"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prince, Md Rakibul Islam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129579899","display_name":"Xiang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129501175","display_name":"Xiaojing Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Xiaojing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129436639","display_name":"Yuhao Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Yuhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129483593","display_name":"Arash Ajoudani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ajoudani, Arash","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129523981","display_name":"Yu She","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"She, Yu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5129484628"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.6348999738693237,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.6348999738693237,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.08590000122785568,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10338","display_name":"Advanced Sensor and Energy Harvesting Materials","score":0.059700001031160355,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6514999866485596},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4142000079154968},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4083999991416931},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.3926999866962433},{"id":"https://openalex.org/keywords/fusion-mechanism","display_name":"Fusion mechanism","score":0.34869998693466187},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.31540000438690186},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.3089999854564667}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.703499972820282},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6514999866485596},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6240000128746033},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5741999745368958},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4142000079154968},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4083999991416931},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.3926999866962433},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.34869998693466187},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C2775960376","wikidata":"https://www.wikidata.org/wiki/Q1435859","display_name":"Grippers","level":2,"score":0.30140000581741333},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.30000001192092896},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.27900001406669617},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.27300000190734863},{"id":"https://openalex.org/C46722567","wikidata":"https://www.wikidata.org/wiki/Q7674139","display_name":"Tactile sensor","level":3,"score":0.27059999108314514},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.26100000739097595},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.25589999556541443}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.12665","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12665","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.12665","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12665","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-Language-Action":[0],"(VLA)":[1],"models":[2],"have":[3],"demonstrated":[4],"significant":[5],"advantages":[6],"in":[7,22,127,130,136],"robotic":[8],"manipulation.":[9],"However,":[10],"their":[11],"reliance":[12],"on":[13,105],"vision":[14],"and":[15,29,86,110,133,146],"language":[16],"often":[17],"leads":[18],"to":[19,51,96],"suboptimal":[20],"performance":[21,121],"tasks":[23],"involving":[24],"visual":[25,139],"occlusion,":[26],"fine-grained":[27,53],"manipulation,":[28],"physical":[30],"contact.":[31],"To":[32],"address":[33],"these":[34],"challenges,":[35],"we":[36,57],"propose":[37],"TacVLA,":[38],"a":[39,59],"fine-tuned":[40],"VLA":[41],"model":[42,116],"by":[43,122],"incorporating":[44],"tactile":[45,66,80,87],"modalities":[46],"into":[47],"the":[48,93,120],"transformer-based":[49],"policy":[50],"enhance":[52],"manipulation":[54],"capabilities.":[55],"Specifically,":[56],"introduce":[58],"contact-aware":[60],"gating":[61],"mechanism":[62],"that":[63,114],"selectively":[64],"activates":[65],"tokens":[67,88],"only":[68],"when":[69],"contact":[70],"is":[71],"detected,":[72],"enabling":[73],"adaptive":[74],"multimodal":[75],"fusion":[76],"while":[77],"avoiding":[78],"irrelevant":[79],"interference.":[81],"The":[82],"fused":[83],"visual,":[84],"language,":[85],"are":[89,142],"jointly":[90],"processed":[91],"within":[92],"transformer":[94],"architecture":[95],"strengthen":[97],"cross-modal":[98],"grounding":[99],"during":[100],"contact-rich":[101],"interaction.":[102],"Extensive":[103],"experiments":[104],"constraint-locked":[106],"disassembly,":[107,128],"in-box":[108,131],"picking":[109,132],"robustness":[111],"evaluations":[112],"demonstrate":[113],"our":[115],"outperforms":[117],"baselines,":[118],"improving":[119],"averaging":[123],"20%":[124],"success":[125],"rate":[126],"60%":[129],"2.1x":[134],"improvement":[135],"scenarios":[137],"with":[138],"occlusion.":[140],"Videos":[141],"available":[143],"at":[144],"https://sites.google.com/view/tacvla":[145],"code":[147],"will":[148],"be":[149],"released.":[150]},"counts_by_year":[],"updated_date":"2026-03-17T07:05:13.627479","created_date":"2026-03-17T00:00:00"}
