{"id":"https://openalex.org/W7154347023","doi":"https://doi.org/10.48550/arxiv.2604.10219","title":"Cognitive Pivot Points and Visual Anchoring: Unveiling and Rectifying Hallucinations in Multimodal Reasoning Models","display_name":"Cognitive Pivot Points and Visual Anchoring: Unveiling and Rectifying Hallucinations in Multimodal Reasoning Models","publication_year":2026,"publication_date":"2026-04-11","ids":{"openalex":"https://openalex.org/W7154347023","doi":"https://doi.org/10.48550/arxiv.2604.10219"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10219","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10219","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10219","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129752387","display_name":"Zhe Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Qian, Zhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020661081","display_name":"Yanbiao Ma","orcid":"https://orcid.org/0000-0002-8472-1475"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Yanbiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133491219","display_name":"Zhuohan Ouyang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ouyang, Zhuohan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133564629","display_name":"Zhonghua Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhonghua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122369633","display_name":"Zhongxing Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Zhongxing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124152274","display_name":"Fei Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Fei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100446438","display_name":"Xinyu Liu","orcid":"https://orcid.org/0000-0002-6776-9063"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xinyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133579478","display_name":"Zongyuan Ge","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ge, Zongyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133572583","display_name":"Yike Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Yike","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133561793","display_name":"Jungong Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Jungong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5129752387"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.19769999384880066,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.19769999384880066,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.1476999968290329,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.09149999916553497,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.728600025177002},{"id":"https://openalex.org/keywords/debiasing","display_name":"Debiasing","score":0.6360999941825867},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.5321000218391418},{"id":"https://openalex.org/keywords/causal-reasoning","display_name":"Causal reasoning","score":0.49140000343322754},{"id":"https://openalex.org/keywords/commonsense-reasoning","display_name":"Commonsense reasoning","score":0.43230000138282776},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.3822000026702881},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.3790000081062317},{"id":"https://openalex.org/keywords/reflection","display_name":"Reflection (computer programming)","score":0.3750999867916107},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.35740000009536743}],"concepts":[{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.728600025177002},{"id":"https://openalex.org/C2779458634","wikidata":"https://www.wikidata.org/wiki/Q24963715","display_name":"Debiasing","level":2,"score":0.6360999941825867},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6069999933242798},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.5321000218391418},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.512499988079071},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.49140000343322754},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.4853000044822693},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.43230000138282776},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3822000026702881},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.37950000166893005},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.3790000081062317},{"id":"https://openalex.org/C65682993","wikidata":"https://www.wikidata.org/wiki/Q1056451","display_name":"Reflection (computer programming)","level":2,"score":0.3750999867916107},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.35740000009536743},{"id":"https://openalex.org/C183521366","wikidata":"https://www.wikidata.org/wiki/Q7256422","display_name":"Psychology of reasoning","level":4,"score":0.3564000129699707},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.34709998965263367},{"id":"https://openalex.org/C161407221","wikidata":"https://www.wikidata.org/wiki/Q4382939","display_name":"Cognitive model","level":3,"score":0.33889999985694885},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3343000113964081},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3278999924659729},{"id":"https://openalex.org/C2780103172","wikidata":"https://www.wikidata.org/wiki/Q1309721","display_name":"Visual Objects","level":3,"score":0.2937000095844269},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.2782000005245209},{"id":"https://openalex.org/C193611912","wikidata":"https://www.wikidata.org/wiki/Q4677596","display_name":"Active vision","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C97364631","wikidata":"https://www.wikidata.org/wiki/Q484284","display_name":"Deductive reasoning","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C166088908","wikidata":"https://www.wikidata.org/wiki/Q308495","display_name":"Abductive reasoning","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C158495155","wikidata":"https://www.wikidata.org/wiki/Q2369151","display_name":"Visual search","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.2703999876976013},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26759999990463257},{"id":"https://openalex.org/C50335755","wikidata":"https://www.wikidata.org/wiki/Q483247","display_name":"Phenomenon","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C2779525943","wikidata":"https://www.wikidata.org/wiki/Q1187300","display_name":"Grammaticality","level":3,"score":0.26010000705718994}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10219","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10219","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10219","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10219","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6940929889678955,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"Large":[1],"Reasoning":[2,32],"Models":[3],"(MLRMs)":[4],"have":[5],"achieved":[6],"remarkable":[7],"strides":[8],"in":[9,58],"visual":[10,59,79,154,168,203],"reasoning":[11,20,127,163],"through":[12],"test":[13],"time":[14],"compute":[15],"scaling,":[16],"yet":[17],"long":[18],"chain":[19],"remains":[21],"prone":[22],"to":[23,55,77,83,96,123,130,166],"hallucinations.":[24],"We":[25,51],"identify":[26],"a":[27,56,89,117,178],"concerning":[28],"phenomenon":[29],"termed":[30],"the":[31,64,74,134,142,162,167,173,202],"Vision":[33],"Truth":[34],"Disconnect":[35],"(RVTD):":[36],"hallucinations":[37],"are":[38],"strongly":[39],"correlated":[40],"with":[41,99,114],"cognitive":[42,184,192],"bifurcation":[43,193],"points":[44,194],"that":[45,182],"often":[46],"exhibit":[47],"high":[48,71,147,190],"entropy":[49,148,191],"states.":[50],"attribute":[52],"this":[53,106,150],"vulnerability":[54],"breakdown":[57],"semantic":[60],"anchoring,":[61],"localized":[62],"within":[63,141],"network's":[65],"intermediate":[66,158],"layers;":[67],"specifically,":[68],"during":[69],"these":[70],"uncertainty":[72],"transitions,":[73],"model":[75],"fails":[76],"query":[78],"evidence,":[80],"reverting":[81],"instead":[82],"language":[84],"priors.":[85],"Consequently,":[86],"we":[87,108,171],"advocate":[88],"shift":[90],"from":[91],"solely":[92],"outcome":[93],"level":[94],"supervision":[95],"augmenting":[97],"it":[98],"fine":[100],"grained":[101],"internal":[102],"attention":[103,155],"guidance.":[104],"To":[105],"end,":[107],"propose":[109],"V-STAR":[110],"(Visual":[111],"Structural":[112],"Training":[113],"Attention":[115,137],"Reinforcement),":[116],"lightweight,":[118],"holistic":[119],"training":[120],"paradigm":[121],"designed":[122],"internalize":[124],"visually":[125],"aware":[126],"capabilities.":[128],"Central":[129],"our":[131],"approach":[132],"is":[133],"Hierarchical":[135],"Visual":[136],"Reward":[138],"(HVAR),":[139],"integrated":[140],"GRPO":[143],"framework.":[144],"Upon":[145],"detecting":[146],"states,":[149],"mechanism":[151],"dynamically":[152],"incentivizes":[153],"across":[156],"critical":[157],"layers,":[159],"thereby":[160,205],"anchoring":[161],"process":[164],"back":[165],"input.":[169],"Furthermore,":[170],"introduce":[172],"Forced":[174],"Reflection":[175],"Mechanism":[176],"(FRM),":[177],"trajectory":[179],"editing":[180],"strategy":[181],"disrupts":[183],"inertia":[185],"by":[186],"triggering":[187],"reflection":[188],"around":[189],"and":[195],"encouraging":[196],"verification":[197],"of":[198],"subsequent":[199],"steps":[200],"against":[201],"input,":[204],"translating":[206],"external":[207],"debiasing":[208],"interventions":[209],"into":[210],"an":[211],"intrinsic":[212],"capability":[213],"for":[214],"hallucination":[215],"mitigation.":[216]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-15T00:00:00"}
