{"id":"https://openalex.org/W4405785096","doi":"https://doi.org/10.1109/iros58592.2024.10801766","title":"Visuo-Tactile Zero-Shot Object Recognition with Vision-Language Model","display_name":"Visuo-Tactile Zero-Shot Object Recognition with Vision-Language Model","publication_year":2024,"publication_date":"2024-10-14","ids":{"openalex":"https://openalex.org/W4405785096","doi":"https://doi.org/10.1109/iros58592.2024.10801766"},"language":"en","primary_location":{"id":"doi:10.1109/iros58592.2024.10801766","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10801766","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090580362","display_name":"Shiori Ueda","orcid":null},"institutions":[{"id":"https://openalex.org/I203951103","display_name":"Keio University","ror":"https://ror.org/02kn6nx58","country_code":"JP","type":"education","lineage":["https://openalex.org/I203951103"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shiori Ueda","raw_affiliation_strings":["Keio University,Yokohama,Japan,223-8522"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Keio University,Yokohama,Japan,223-8522","institution_ids":["https://openalex.org/I203951103"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038408644","display_name":"Atsushi Hashimoto","orcid":"https://orcid.org/0000-0002-0799-4269"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Atsushi Hashimoto","raw_affiliation_strings":["OM-RON SINIC X Corporation,Bunkyo-Ku, Tokyo,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OM-RON SINIC X Corporation,Bunkyo-Ku, Tokyo,Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034718334","display_name":"Masashi Hamaya","orcid":"https://orcid.org/0000-0003-4189-8219"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Masashi Hamaya","raw_affiliation_strings":["OM-RON SINIC X Corporation,Bunkyo-Ku, Tokyo,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OM-RON SINIC X Corporation,Bunkyo-Ku, Tokyo,Japan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045607922","display_name":"Kazutoshi Tanaka","orcid":"https://orcid.org/0000-0003-0880-9333"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kazutoshi Tanaka","raw_affiliation_strings":["OM-RON SINIC X Corporation,Bunkyo-Ku, Tokyo,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OM-RON SINIC X Corporation,Bunkyo-Ku, Tokyo,Japan","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005819073","display_name":"Hideo Sait\u00f4","orcid":"https://orcid.org/0000-0002-2421-9862"},"institutions":[{"id":"https://openalex.org/I203951103","display_name":"Keio University","ror":"https://ror.org/02kn6nx58","country_code":"JP","type":"education","lineage":["https://openalex.org/I203951103"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hideo Saito","raw_affiliation_strings":["Keio University,Yokohama,Japan,223-8522"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Keio University,Yokohama,Japan,223-8522","institution_ids":["https://openalex.org/I203951103"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3052,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.66009425,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"7243","last_page":"7250"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.9688000082969666,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.9688000082969666,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10914","display_name":"Tactile and Sensory Interactions","score":0.9610000252723694,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9409000277519226,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7391297817230225},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.5901973843574524},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5583004355430603},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5562423467636108},{"id":"https://openalex.org/keywords/cognitive-neuroscience-of-visual-object-recognition","display_name":"Cognitive neuroscience of visual object recognition","score":0.5461030602455139},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5326105356216431},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4394301772117615},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.4256320595741272},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.056556493043899536},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.05067858099937439}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7391297817230225},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.5901973843574524},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5583004355430603},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5562423467636108},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.5461030602455139},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5326105356216431},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4394301772117615},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.4256320595741272},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.056556493043899536},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.05067858099937439},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C191897082","wikidata":"https://www.wikidata.org/wiki/Q11467","display_name":"Metallurgy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros58592.2024.10801766","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10801766","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.46000000834465027,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W2068882612","https://openalex.org/W2075654868","https://openalex.org/W2075673359","https://openalex.org/W2166150182","https://openalex.org/W2171130677","https://openalex.org/W2344531169","https://openalex.org/W2566181476","https://openalex.org/W2567050476","https://openalex.org/W2607241646","https://openalex.org/W2752796333","https://openalex.org/W2783126232","https://openalex.org/W2802773536","https://openalex.org/W2906578938","https://openalex.org/W2952866402","https://openalex.org/W2963654998","https://openalex.org/W2963886462","https://openalex.org/W2967155559","https://openalex.org/W2967540821","https://openalex.org/W3176573865","https://openalex.org/W3182705783","https://openalex.org/W4205996330","https://openalex.org/W4384818237","https://openalex.org/W4386160289","https://openalex.org/W4387805864","https://openalex.org/W4390244622","https://openalex.org/W4392172801","https://openalex.org/W4402726993","https://openalex.org/W6679436768","https://openalex.org/W6744627333","https://openalex.org/W6851592950","https://openalex.org/W6853035495","https://openalex.org/W6861483881"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2114275278","https://openalex.org/W1489511283","https://openalex.org/W2974914859","https://openalex.org/W2026565050","https://openalex.org/W2110244802","https://openalex.org/W2163728705","https://openalex.org/W949345935"],"abstract_inverted_index":{"Tactile":[0],"perception":[1],"is":[2],"vital,":[3],"especially":[4],"when":[5],"distinguishing":[6],"visually":[7],"similar":[8,45],"objects.":[9,46],"We":[10],"propose":[11],"an":[12],"approach":[13,29],"to":[14,36,71,98],"incorporate":[15],"tactile":[16,38,51,64],"data":[17,52],"into":[18,53],"a":[19,54],"Vision-Language":[20],"Model":[21],"(VLM)":[22],"for":[23,62],"visuo-tactile":[24],"zero-shot":[25,32],"object":[26,60],"recognition.":[27],"Our":[28],"leverages":[30],"the":[31,41,84],"capability":[33],"of":[34,43],"VLMs":[35],"infer":[37],"properties":[39],"from":[40],"names":[42,61],"tactilely":[44],"The":[47,78],"proposed":[48,79],"method":[49,80],"translates":[50],"textual":[55],"description":[56],"solely":[57],"by":[58,100],"annotating":[59],"each":[63],"sequence":[65],"during":[66],"training,":[67],"making":[68],"it":[69],"adaptable":[70],"various":[72],"contexts":[73],"with":[74],"low":[75],"training":[76],"costs.":[77],"was":[81],"evaluated":[82],"on":[83],"FoodReplica":[85],"and":[86],"Cube":[87],"datasets,":[88],"demonstrating":[89],"its":[90],"effectiveness":[91],"in":[92],"recognizing":[93],"objects":[94],"that":[95],"are":[96],"difficult":[97],"distinguish":[99],"vision":[101],"alone.":[102]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
