{"id":"https://openalex.org/W7131103794","doi":"https://doi.org/10.1109/iccvw69036.2025.00153","title":"InspectVLM: Unified in Theory, Unreliable in Practice","display_name":"InspectVLM: Unified in Theory, Unreliable in Practice","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W7131103794","doi":"https://doi.org/10.1109/iccvw69036.2025.00153"},"language":null,"primary_location":{"id":"doi:10.1109/iccvw69036.2025.00153","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00153","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126622283","display_name":"Conor Wallace","orcid":null},"institutions":[{"id":"https://openalex.org/I2799881413","display_name":"Zen-Noh (Japan)","ror":"https://ror.org/05cwghy20","country_code":"JP","type":"company","lineage":["https://openalex.org/I2799881413"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Conor Wallace","raw_affiliation_strings":["Zeitview,India"],"affiliations":[{"raw_affiliation_string":"Zeitview,India","institution_ids":["https://openalex.org/I2799881413"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035385486","display_name":"Isaac Corley","orcid":"https://orcid.org/0000-0002-9273-7303"},"institutions":[{"id":"https://openalex.org/I2799881413","display_name":"Zen-Noh (Japan)","ror":"https://ror.org/05cwghy20","country_code":"JP","type":"company","lineage":["https://openalex.org/I2799881413"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Isaac Corley","raw_affiliation_strings":["Zeitview,India"],"affiliations":[{"raw_affiliation_string":"Zeitview,India","institution_ids":["https://openalex.org/I2799881413"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055672869","display_name":"Jonathan Lwowski","orcid":null},"institutions":[{"id":"https://openalex.org/I2799881413","display_name":"Zen-Noh (Japan)","ror":"https://ror.org/05cwghy20","country_code":"JP","type":"company","lineage":["https://openalex.org/I2799881413"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Jonathan Lwowski","raw_affiliation_strings":["Zeitview,India"],"affiliations":[{"raw_affiliation_string":"Zeitview,India","institution_ids":["https://openalex.org/I2799881413"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5126622283"],"corresponding_institution_ids":["https://openalex.org/I2799881413"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.88546233,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1435","last_page":"1443"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.09470000118017197,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.09470000118017197,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13398","display_name":"Data Analysis with R","score":0.02979999966919422,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.024000000208616257,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/unification","display_name":"Unification","score":0.6053000092506409},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5257999897003174},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.44999998807907104},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4223000109195709},{"id":"https://openalex.org/keywords/unified-model","display_name":"Unified Model","score":0.3637999892234802},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.3449999988079071},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.32170000672340393},{"id":"https://openalex.org/keywords/cognitive-reframing","display_name":"Cognitive reframing","score":0.3192000091075897}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6431000232696533},{"id":"https://openalex.org/C96146094","wikidata":"https://www.wikidata.org/wiki/Q609057","display_name":"Unification","level":2,"score":0.6053000092506409},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5820000171661377},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5257999897003174},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.44999998807907104},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4223000109195709},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3743000030517578},{"id":"https://openalex.org/C45493050","wikidata":"https://www.wikidata.org/wiki/Q7884934","display_name":"Unified Model","level":2,"score":0.3637999892234802},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.3449999988079071},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.32170000672340393},{"id":"https://openalex.org/C187029079","wikidata":"https://www.wikidata.org/wiki/Q958679","display_name":"Cognitive reframing","level":2,"score":0.3192000091075897},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.29910001158714294},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.29789999127388},{"id":"https://openalex.org/C142816647","wikidata":"https://www.wikidata.org/wiki/Q5573018","display_name":"Glyph (data visualization)","level":3,"score":0.29750001430511475},{"id":"https://openalex.org/C147494362","wikidata":"https://www.wikidata.org/wiki/Q2078905","display_name":"Troubleshooting","level":2,"score":0.28029999136924744},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2689000070095062},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.2685000002384186},{"id":"https://openalex.org/C172367668","wikidata":"https://www.wikidata.org/wiki/Q6504956","display_name":"Data visualization","level":3,"score":0.26249998807907104},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.2572999894618988},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccvw69036.2025.00153","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00153","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W2194775991","https://openalex.org/W2963150697","https://openalex.org/W4312881242","https://openalex.org/W4393158476","https://openalex.org/W4394597235","https://openalex.org/W4402660140","https://openalex.org/W4402915909","https://openalex.org/W4404098723","https://openalex.org/W4404612908","https://openalex.org/W4405786346","https://openalex.org/W4413147844"],"related_works":[],"abstract_inverted_index":{"Unified":[0],"vision-language":[1],"models":[2,37,91],"(VLMs)":[3],"promise":[4],"to":[5,87,116],"streamline":[6],"computer":[7],"vision":[8],"pipelines":[9],"by":[10],"reframing":[11],"multiple":[12],"visual":[13,122,138],"tasks\u2014such":[14],"as":[15],"classification,":[16],"detection,":[17,112],"and":[18,41,78,113,140],"keypoint":[19,80],"localization\u2014within":[20],"a":[21,58],"single":[22],"language-driven":[23,129],"interface.":[24],"This":[25],"architecture":[26],"is":[27],"particularly":[28],"appealing":[29],"in":[30,92,145],"industrial":[31,147],"inspection,":[32],"where":[33],"managing":[34],"disjoint":[35],"task-specific":[36],"introduces":[38],"complexity,":[39],"inefficiency,":[40],"maintenance":[42],"overhead.":[43],"In":[44],"this":[45,53],"paper,":[46],"we":[47,82],"critically":[48],"evaluate":[49],"the":[50,97,137],"viability":[51],"of":[52,121],"unified":[54],"paradigm":[55],"using":[56],"InspectVLM,":[57],"Florence-2\u2013based":[59],"VLM":[60],"trained":[61],"on":[62,75],"InspectMM,":[63],"our":[64],"new":[65],"large-scale":[66],"multimodal,":[67],"multitask":[68],"inspection":[69,94],"dataset.":[70],"While":[71],"InspectVLM":[72],"performs":[73],"competitively":[74],"image-level":[76],"classification":[77],"structured":[79],"tasks,":[81],"find":[83],"that":[84,127],"it":[85],"fails":[86],"match":[88],"traditional":[89],"ResNet-based":[90],"core":[93],"metrics.":[95],"Notably,":[96],"model":[98],"exhibits":[99],"brittle":[100],"behavior":[101],"under":[102],"low":[103],"prompt":[104],"variability,":[105],"produces":[106],"degenerate":[107],"outputs":[108],"for":[109,143],"fine-grained":[110],"object":[111],"frequently":[114],"defaults":[115],"memorized":[117],"language":[118],"responses":[119],"regardless":[120],"input.":[123],"Our":[124],"findings":[125],"suggest":[126],"while":[128],"unification":[130],"offers":[131],"conceptual":[132],"elegance,":[133],"current":[134],"VLMs":[135],"lack":[136],"grounding":[139],"robustness":[141],"necessary":[142],"deployment":[144],"precision-critical":[146],"inspections.":[148]},"counts_by_year":[],"updated_date":"2026-02-25T06:17:34.324206","created_date":"2026-02-24T00:00:00"}
