{"id":"https://openalex.org/W7128383720","doi":"https://doi.org/10.48550/arxiv.2602.06652","title":"Same Answer, Different Representations: Hidden instability in VLMs","display_name":"Same Answer, Different Representations: Hidden instability in VLMs","publication_year":2026,"publication_date":"2026-02-06","ids":{"openalex":"https://openalex.org/W7128383720","doi":"https://doi.org/10.48550/arxiv.2602.06652"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.06652","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087488302","display_name":"Farooq Ahmad Wani","orcid":"https://orcid.org/0009-0001-3767-8515"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wani, Farooq Ahmad","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010504829","display_name":"Alessandro Suglia","orcid":"https://orcid.org/0000-0002-3177-5197"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Suglia, Alessandro","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125390957","display_name":"Rohit Saxena","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saxena, Rohit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080213009","display_name":"Aryo Pradipta Gema","orcid":"https://orcid.org/0009-0007-1163-3531"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gema, Aryo Pradipta","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068169387","display_name":"Wai-Chung Kwan","orcid":"https://orcid.org/0000-0002-2942-4208"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kwan, Wai-Chung","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125375788","display_name":"Fazl Barez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Barez, Fazl","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083528787","display_name":"Maria Sofia Bucarelli","orcid":"https://orcid.org/0009-0007-5101-8242"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bucarelli, Maria Sofia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125384002","display_name":"Fabrizio Silvestri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Silvestri, Fabrizio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125411253","display_name":"Pasquale Minervini","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Minervini, Pasquale","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8906000256538391,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8906000256538391,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11148","display_name":"Language, Metaphor, and Cognition","score":0.008799999952316284,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12694","display_name":"Categorization, perception, and language","score":0.006500000134110451,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6895999908447266},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5717999935150146},{"id":"https://openalex.org/keywords/smoothness","display_name":"Smoothness","score":0.45680001378059387},{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.45249998569488525},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.33340001106262207},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.29660001397132874},{"id":"https://openalex.org/keywords/internal-model","display_name":"Internal model","score":0.2883000075817108}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6895999908447266},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5717999935150146},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5038999915122986},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45840001106262207},{"id":"https://openalex.org/C102634674","wikidata":"https://www.wikidata.org/wiki/Q868473","display_name":"Smoothness","level":2,"score":0.45680001378059387},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.45249998569488525},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3815999925136566},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.33340001106262207},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3321000039577484},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.29660001397132874},{"id":"https://openalex.org/C28427503","wikidata":"https://www.wikidata.org/wiki/Q13580300","display_name":"Internal model","level":3,"score":0.2883000075817108},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.2849000096321106},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28119999170303345},{"id":"https://openalex.org/C3018868096","wikidata":"https://www.wikidata.org/wiki/Q2693233","display_name":"Internal consistency","level":3,"score":0.27880001068115234},{"id":"https://openalex.org/C207821765","wikidata":"https://www.wikidata.org/wiki/Q405372","display_name":"Instability","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.26739999651908875},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.259799987077713},{"id":"https://openalex.org/C45493050","wikidata":"https://www.wikidata.org/wiki/Q7884934","display_name":"Unified Model","level":2,"score":0.25699999928474426},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.25600001215934753}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.06652","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.06652","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.06652","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.06652","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.7951974868774414,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"robustness":[1,118],"of":[2,52,100],"Vision":[3],"Language":[4],"Models":[5],"(VLMs)":[6],"is":[7,30],"commonly":[8],"assessed":[9],"through":[10],"output-level":[11],"invariance,":[12],"implicitly":[13],"assuming":[14],"that":[15,27,40,104,146],"stable":[16,19],"predictions":[17],"reflect":[18],"multimodal":[20],"processing.":[21],"In":[22],"this":[23,28,60,95],"work,":[24],"we":[25,144],"argue":[26],"assumption":[29],"insufficient.":[31],"We":[32],"introduce":[33],"a":[34],"representation-aware":[35],"and":[36,47,69,161],"frequency-aware":[37],"evaluation":[38],"framework":[39,61],"measures":[41],"internal":[42,86],"embedding":[43],"drift,":[44],"spectral":[45],"sensitivity,":[46,134],"structural":[48],"smoothness":[49],"(spatial":[50],"consistency":[51],"vision":[53],"tokens),":[54],"alongside":[55],"standard":[56],"label-based":[57],"metrics.":[58],"Applying":[59],"to":[62,107],"modern":[63],"VLMs":[64],"across":[65],"the":[66,98,167],"SEEDBench,":[67],"MMMU,":[68],"POPE":[70],"datasets":[71],"reveals":[72],"three":[73],"distinct":[74],"failure":[75],"modes.":[76],"First,":[77],"models":[78,125,158,177],"frequently":[79],"preserve":[80],"predicted":[81],"answers":[82],"while":[83],"undergoing":[84],"substantial":[85],"representation":[87],"drift;":[88],"for":[89],"perturbations":[90,147],"such":[91],"as":[92],"text":[93],"overlays,":[94],"drift":[96],"approaches":[97],"magnitude":[99],"inter-image":[101],"variability,":[102],"indicating":[103],"representations":[105],"move":[106],"regions":[108],"typically":[109],"occupied":[110],"by":[111,175],"unrelated":[112],"inputs":[113],"despite":[114],"unchanged":[115],"outputs.":[116],"Second,":[117],"does":[119],"not":[120],"improve":[121],"with":[122,136],"scale;":[123],"larger":[124],"achieve":[126],"higher":[127],"accuracy":[128],"but":[129,165],"exhibit":[130],"equal":[131],"or":[132],"greater":[133],"consistent":[135],"sharper":[137],"yet":[138],"more":[139,179],"fragile":[140],"decision":[141],"boundaries.":[142],"Third,":[143],"find":[145],"affect":[148],"tasks":[149],"differently:":[150],"they":[151,155,170],"harm":[152],"reasoning":[153],"when":[154],"disrupt":[156],"how":[157],"combine":[159],"coarse":[160],"fine":[162],"visual":[163],"cues,":[164],"on":[166],"hallucination":[168],"benchmarks,":[169],"can":[171],"reduce":[172],"false":[173],"positives":[174],"making":[176],"generate":[178],"conservative":[180],"answers.":[181]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-10T00:00:00"}
