{"id":"https://openalex.org/W4416749610","doi":"https://doi.org/10.1109/iros60139.2025.11245928","title":"ConViTac: Aligning Visual-Tactile Fusion with Contrastive Representations","display_name":"ConViTac: Aligning Visual-Tactile Fusion with Contrastive Representations","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416749610","doi":"https://doi.org/10.1109/iros60139.2025.11245928"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11245928","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11245928","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101698318","display_name":"Zhiyuan Wu","orcid":"https://orcid.org/0000-0003-4746-2168"},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Zhiyuan Wu","raw_affiliation_strings":["King&#x2019;s College London, Strand,Department of Engineering,London,United Kingdom,WC2R 2LS"],"affiliations":[{"raw_affiliation_string":"King&#x2019;s College London, Strand,Department of Engineering,London,United Kingdom,WC2R 2LS","institution_ids":["https://openalex.org/I183935753"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073503029","display_name":"Yongqiang Zhao","orcid":"https://orcid.org/0000-0002-6974-7327"},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yongqiang Zhao","raw_affiliation_strings":["King&#x2019;s College London, Strand,Department of Engineering,London,United Kingdom,WC2R 2LS"],"affiliations":[{"raw_affiliation_string":"King&#x2019;s College London, Strand,Department of Engineering,London,United Kingdom,WC2R 2LS","institution_ids":["https://openalex.org/I183935753"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012646628","display_name":"Shan Luo","orcid":"https://orcid.org/0000-0003-4760-0372"},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shan Luo","raw_affiliation_strings":["King&#x2019;s College London, Strand,Department of Engineering,London,United Kingdom,WC2R 2LS"],"affiliations":[{"raw_affiliation_string":"King&#x2019;s College London, Strand,Department of Engineering,London,United Kingdom,WC2R 2LS","institution_ids":["https://openalex.org/I183935753"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101698318"],"corresponding_institution_ids":["https://openalex.org/I183935753"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38362676,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"8545","last_page":"8552"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10338","display_name":"Advanced Sensor and Energy Harvesting Materials","score":0.36079999804496765,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10338","display_name":"Advanced Sensor and Energy Harvesting Materials","score":0.36079999804496765,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.3257000148296356,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10914","display_name":"Tactile and Sensory Interactions","score":0.1762000024318695,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6463000178337097},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.6085000038146973},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5331000089645386},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.527899980545044},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5246000289916992},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5164999961853027},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4359999895095825},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4341000020503998}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7450000047683716},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6931999921798706},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6463000178337097},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.6085000038146973},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5331000089645386},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.527899980545044},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5246000289916992},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5164999961853027},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4359999895095825},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4341000020503998},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.4185999929904938},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.41339999437332153},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3580999970436096},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34299999475479126},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3393000066280365},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.32580000162124634},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.31790000200271606},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.305400013923645},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.30070000886917114},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2840000092983246},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2745000123977661}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11245928","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11245928","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2070612690","https://openalex.org/W2102544846","https://openalex.org/W2159295847","https://openalex.org/W2194775991","https://openalex.org/W2775635818","https://openalex.org/W2962858109","https://openalex.org/W2962983231","https://openalex.org/W2963263790","https://openalex.org/W3035524453","https://openalex.org/W3044483011","https://openalex.org/W3093327960","https://openalex.org/W3094502228","https://openalex.org/W3129699689","https://openalex.org/W3133478370","https://openalex.org/W4214689917","https://openalex.org/W4312933868","https://openalex.org/W4322627950","https://openalex.org/W4385430681","https://openalex.org/W4386076327","https://openalex.org/W4391128608","https://openalex.org/W4393132157","https://openalex.org/W4401414089","https://openalex.org/W4401414358","https://openalex.org/W4402726993","https://openalex.org/W4405786423","https://openalex.org/W4413925458"],"related_works":[],"abstract_inverted_index":{"Vision":[0],"and":[1,16,45,104,129,152,170],"touch":[2],"are":[3,113,176],"two":[4],"fundamental":[5],"sensory":[6],"modalities":[7],"for":[8,47],"robots,":[9],"offering":[10],"complementary":[11],"information":[12],"that":[13,91],"enhances":[14],"perception":[15],"manipulation":[17],"tasks.":[18,134,173],"Previous":[19],"research":[20],"has":[21],"attempted":[22],"to":[23,28,52,70,101,115,139,165],"jointly":[24],"learn":[25],"visual-tactile":[26,65,117],"representations":[27,128],"extract":[29],"more":[30],"meaningful":[31],"information.":[32],"However,":[33],"these":[34],"approaches":[35],"often":[36],"rely":[37],"on":[38,132,177],"direct":[39],"combination,":[40],"such":[41],"as":[42],"feature":[43,56,118],"addition":[44],"concatenation,":[46],"modality":[48],"fusion,":[49],"which":[50,160],"tend":[51],"result":[53],"in":[54,145,167],"poor":[55],"integration.":[57],"In":[58],"this":[59],"paper,":[60],"we":[61],"propose":[62],"ConViTac,":[63],"a":[64,85,93],"representation":[66],"learning":[67,100],"network":[68],"designed":[69],"enhance":[71],"the":[72,126,141,153],"alignment":[73],"of":[74,143,155],"features":[75],"during":[76],"fusion":[77,119],"using":[78],"contrastive":[79,94,99],"representations.":[80],"Our":[81],"key":[82],"contribution":[83],"is":[84],"Contrastive":[86],"Embedding":[87],"Conditioning":[88],"(CEC)":[89],"mechanism":[90],"leverages":[92],"encoder":[95],"pretrained":[96],"through":[97,120],"self-supervised":[98],"project":[102,179],"visual":[103],"tactile":[105],"inputs":[106],"into":[107],"unified":[108,127],"latent":[109],"embeddings.":[110],"These":[111],"embeddings":[112],"used":[114],"couple":[116],"cross-modal":[121],"attention,":[122],"aiming":[123],"at":[124],"aligning":[125],"enhancing":[130],"performance":[131],"downstream":[133],"We":[135],"conduct":[136],"extensive":[137],"experiments":[138],"demonstrate":[140],"superiority":[142],"ConViTac":[144],"real":[146],"world":[147],"over":[148],"current":[149],"state-of-the-art":[150],"methods":[151],"effectiveness":[154],"our":[156,178],"proposed":[157],"CEC":[158],"mechanism,":[159],"improves":[161],"accuracy":[162],"by":[163],"up":[164],"12.0%":[166],"material":[168],"classification":[169],"grasping":[171],"prediction":[172],"More":[174],"details":[175],"website.":[180]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-28T00:00:00"}
