{"id":"https://openalex.org/W7151500270","doi":"https://doi.org/10.48550/arxiv.2604.03322","title":"VitaTouch: Property-Aware Vision-Tactile-Language Model for Robotic Quality Inspection in Manufacturing","display_name":"VitaTouch: Property-Aware Vision-Tactile-Language Model for Robotic Quality Inspection in Manufacturing","publication_year":2026,"publication_date":"2026-04-02","ids":{"openalex":"https://openalex.org/W7151500270","doi":"https://doi.org/10.48550/arxiv.2604.03322"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.03322","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03322","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.03322","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128747151","display_name":"Junyi Zong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zong, Junyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133122231","display_name":"Qingxuan Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia, Qingxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025657980","display_name":"Meixian Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Meixian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133080094","display_name":"Tong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Tong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133129880","display_name":"Jiayuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jiayuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125983655","display_name":"Zihang Lv","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lv, Zihang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133095449","display_name":"Gang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Gang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133117693","display_name":"Fang Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Fang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.4034000039100647,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.4034000039100647,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.15279999375343323,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10338","display_name":"Advanced Sensor and Energy Harvesting Materials","score":0.09539999812841415,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5960000157356262},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4756999909877777},{"id":"https://openalex.org/keywords/sorting","display_name":"Sorting","score":0.4697999954223633},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4431000053882599},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4246000051498413},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.42329999804496765},{"id":"https://openalex.org/keywords/visual-inspection","display_name":"Visual inspection","score":0.3971000015735626},{"id":"https://openalex.org/keywords/prefix","display_name":"Prefix","score":0.38190001249313354},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.367000013589859}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7354999780654907},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6521000266075134},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5960000157356262},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5629000067710876},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4756999909877777},{"id":"https://openalex.org/C111696304","wikidata":"https://www.wikidata.org/wiki/Q2303697","display_name":"Sorting","level":2,"score":0.4697999954223633},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4431000053882599},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4246000051498413},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.42329999804496765},{"id":"https://openalex.org/C168820333","wikidata":"https://www.wikidata.org/wiki/Q448889","display_name":"Visual inspection","level":2,"score":0.3971000015735626},{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.38190001249313354},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.367000013589859},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.3522000014781952},{"id":"https://openalex.org/C17511633","wikidata":"https://www.wikidata.org/wiki/Q830694","display_name":"SMT placement equipment","level":3,"score":0.34940001368522644},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.3280999958515167},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.3278000056743622},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3212999999523163},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.32089999318122864},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.31459999084472656},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3025999963283539},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.299699991941452},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2872999906539917},{"id":"https://openalex.org/C199639397","wikidata":"https://www.wikidata.org/wiki/Q1788588","display_name":"Engineering drawing","level":1,"score":0.2849000096321106},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C40149104","wikidata":"https://www.wikidata.org/wiki/Q5620977","display_name":"Factory (object-oriented programming)","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26089999079704285},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.25769999623298645},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.25600001215934753},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.25540000200271606}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.03322","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03322","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.03322","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03322","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6913711428642273,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Quality":[0],"inspection":[1],"in":[2,167],"smart":[3],"manufacturing":[4],"requires":[5],"identifying":[6],"intrinsic":[7],"material":[8],"and":[9,22,34,42,50,70,74,91,103,123,145,151,156,162],"surface":[10],"properties":[11],"beyond":[12],"visible":[13],"geometry,":[14],"yet":[15],"vision-only":[16],"methods":[17],"remain":[18],"vulnerable":[19],"to":[20,46],"occlusion":[21],"reflection.":[23],"We":[24,64,79],"propose":[25],"VitaTouch,":[26],"a":[27,43,60,83,132],"property-aware":[28],"vision-tactile-language":[29],"model":[30],"for":[31,59,148],"material-property":[32],"inference":[33],"natural-language":[35],"attribute":[36],"description.":[37],"VitaTouch":[38,96,141],"uses":[39],"modality-specific":[40],"encoders":[41],"dual":[44],"Q-Former":[45],"extract":[47],"language-relevant":[48],"visual":[49],"tactile":[51],"features,":[52],"which":[53],"are":[54,174],"compressed":[55],"into":[56],"prefix":[57],"tokens":[58],"large":[61],"language":[62],"model.":[63],"align":[65],"each":[66],"modality":[67],"with":[68,86],"text":[69],"explicitly":[71],"couple":[72],"vision":[73],"touch":[75],"through":[76],"contrastive":[77],"learning.":[78],"also":[80],"construct":[81],"VitaSet,":[82,114],"multimodal":[84],"dataset":[85],"186":[87],"objects,":[88],"52k":[89],"images,":[90],"5.1k":[92],"human-verified":[93],"instruction-answer":[94],"pairs.":[95],"achieves":[97,131],"the":[98,104,127,177],"best":[99],"performance":[100],"on":[101,111],"HCT":[102],"overall":[105],"TVL":[106],"benchmark,":[107],"while":[108],"remaining":[109],"competitive":[110],"SSVTP.":[112],"On":[113],"it":[115],"reaches":[116],"88.89%":[117],"hardness":[118],"accuracy,":[119,122],"75.13%":[120],"roughness":[121],"54.81%":[124],"descriptor":[125],"recall;":[126],"material-description":[128],"task":[129],"further":[130],"peak":[133],"semantic":[134],"similarity":[135],"of":[136],"0.9009.":[137],"With":[138],"LoRA-based":[139],"fine-tuning,":[140],"attains":[142],"100.0%,":[143],"96.0%,":[144],"92.0%":[146],"accuracy":[147,161],"2-,":[149],"3-,":[150],"5-category":[152],"defect":[153],"recognition,":[154],"respectively,":[155],"delivers":[157],"94.0%":[158,163],"closed-loop":[159],"recognition":[160],"end-to-end":[164],"sorting":[165],"success":[166],"100":[168],"laboratory":[169],"robotic":[170],"trials.":[171],"More":[172],"details":[173],"available":[175],"at":[176],"project":[178],"page:":[179],"https://vitatouch.github.io/":[180]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-08T00:00:00"}
