{"id":"https://openalex.org/W4405786613","doi":"https://doi.org/10.1109/iros58592.2024.10801725","title":"Subtle-Diff: A Dataset for Precise Recognition of Subtle Differences Among Visually Similar Objects","display_name":"Subtle-Diff: A Dataset for Precise Recognition of Subtle Differences Among Visually Similar Objects","publication_year":2024,"publication_date":"2024-10-14","ids":{"openalex":"https://openalex.org/W4405786613","doi":"https://doi.org/10.1109/iros58592.2024.10801725"},"language":"en","primary_location":{"id":"doi:10.1109/iros58592.2024.10801725","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10801725","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059202273","display_name":"Fumiya Matsuzawa","orcid":"https://orcid.org/0000-0002-6663-2049"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Fumiya Matsuzawa","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST),Faculty of Artificial Intelligence Research Center,Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST),Faculty of Artificial Intelligence Research Center,Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029961156","display_name":"Yue Qiu","orcid":"https://orcid.org/0000-0002-2181-9475"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yue Qiu","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST),Faculty of Artificial Intelligence Research Center,Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST),Faculty of Artificial Intelligence Research Center,Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051470569","display_name":"Y. Sun","orcid":"https://orcid.org/0009-0003-1523-5689"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yanjun Sun","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST),Faculty of Artificial Intelligence Research Center,Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST),Faculty of Artificial Intelligence Research Center,Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060846465","display_name":"Kenji Iwata","orcid":"https://orcid.org/0000-0001-6609-8221"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kenji Iwata","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST),Faculty of Artificial Intelligence Research Center,Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST),Faculty of Artificial Intelligence Research Center,Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011507481","display_name":"Hirokatsu Kataoka","orcid":"https://orcid.org/0000-0001-8844-165X"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hirokatsu Kataoka","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST),Faculty of Artificial Intelligence Research Center,Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST),Faculty of Artificial Intelligence Research Center,Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043995369","display_name":"Yutaka Satoh","orcid":"https://orcid.org/0000-0002-0638-0855"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yutaka Satoh","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST),Faculty of Artificial Intelligence Research Center,Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST),Faculty of Artificial Intelligence Research Center,Japan","institution_ids":["https://openalex.org/I73613424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5059202273"],"corresponding_institution_ids":["https://openalex.org/I73613424"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.36166188,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5888","last_page":"5894"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.8015999794006348,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.8015999794006348,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6748256683349609},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5826717019081116},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4762853980064392},{"id":"https://openalex.org/keywords/cognitive-neuroscience-of-visual-object-recognition","display_name":"Cognitive neuroscience of visual object recognition","score":0.4493817985057831},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.41748204827308655},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.36007121205329895}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6748256683349609},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5826717019081116},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4762853980064392},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.4493817985057831},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.41748204827308655},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.36007121205329895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros58592.2024.10801725","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10801725","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.46000000834465027,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1948251820","https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2108598243","https://openalex.org/W2741631785","https://openalex.org/W2963530300","https://openalex.org/W2964196083","https://openalex.org/W2988981892","https://openalex.org/W3140300848","https://openalex.org/W3203247393","https://openalex.org/W4281633937","https://openalex.org/W4285265603","https://openalex.org/W4312261477","https://openalex.org/W4386065506","https://openalex.org/W4386076015","https://openalex.org/W4390872842","https://openalex.org/W4390873348","https://openalex.org/W4402716012","https://openalex.org/W6790019176","https://openalex.org/W6849177959"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Visual":[0],"inspection":[1,204],"robots":[2],"used":[3],"in":[4,51,141,174,195,202],"factories":[5],"and":[6,20,40,63,92,98,137,189,198,205],"outdoor":[7],"environments":[8],"require":[9],"the":[10,23,28,33,152,175],"ability":[11,153],"to":[12,26,30,67,154,193],"accurately":[13],"recognize":[14],"visual":[15,90,203],"differences":[16,29,54,91,118],"between":[17,135],"similar":[18],"objects":[19,197],"further":[21],"verbalize":[22],"recognition":[24,75,177],"results":[25],"present":[27],"humans.":[31],"Despite":[32],"application":[34],"of":[35,126],"Large":[36],"Language":[37],"Models":[38,102],"(LLMs)":[39],"multimodal":[41],"LLMs":[42,62],"across":[43,55,130],"various":[44],"domains,":[45],"our":[46],"research":[47],"highlights":[48],"their":[49,89],"insufficiency":[50],"verbalizing":[52],"nuanced":[53],"images.":[56],"To":[57],"address":[58],"this,":[59],"we":[60,163],"leveraged":[61],"image":[64,157],"generation":[65],"AI":[66],"develop":[68],"a":[69,93,165],"dataset":[70,188,209],"aimed":[71],"at":[72,212],"assessing":[73],"difference":[74,95,160,176],"capabilities.":[76],"We":[77],"introduced":[78],"two":[79],"novel":[80],"tasks":[81],"using":[82],"this":[83],"dataset:":[84],"selecting":[85],"images":[86],"based":[87],"on":[88,104],"conditional":[94],"captioning":[96],"task,":[97,178],"evaluated":[99],"existing":[100,183],"Vision-Language":[101],"(VLMs)":[103],"these":[105],"tasks.":[106],"Our":[107,187],"findings":[108,190],"reveal":[109],"that":[110,147,168],"advanced":[111],"models":[112,150],"like":[113],"GPT-4V":[114],"can":[115],"describe":[116],"subtle":[117],"with":[119],"comparative":[120],"expressions,":[121],"yet":[122],"they":[123],"fall":[124],"short":[125],"matching":[127],"human":[128,138],"performance":[129,181],"all":[131],"attributes.":[132],"This":[133],"discrepancy":[134],"model":[136,167],"recognition,":[139],"especially":[140],"identifying":[142],"easily":[143],"discernible":[144],"differences,":[145],"suggests":[146],"most":[148],"current":[149],"lack":[151],"directly":[155],"compare":[156],"pairs":[158],"for":[159],"detection.":[161],"Consequently,":[162],"propose":[164],"new":[166],"incorporates":[169],"an":[170],"image-text":[171],"similarity":[172],"approach":[173],"showing":[179],"superior":[180],"over":[182],"models,":[184],"including":[185],"GPT-4V.":[186],"will":[191],"contribute":[192],"advancements":[194],"differencing":[196],"improve":[199],"robotic":[200],"applications":[201],"object":[206],"picking.":[207],"The":[208],"is":[210],"available":[211],"DICTA":[213],"challenge":[214],"page.":[215]},"counts_by_year":[],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
