{"id":"https://openalex.org/W7118997193","doi":"https://doi.org/10.1109/tmm.2026.3651099","title":"Mitigating Hallucinations in Large Vision-Language Models via Visual-Enhanced Contrastive Decoding","display_name":"Mitigating Hallucinations in Large Vision-Language Models via Visual-Enhanced Contrastive Decoding","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7118997193","doi":"https://doi.org/10.1109/tmm.2026.3651099"},"language":null,"primary_location":{"id":"doi:10.1109/tmm.2026.3651099","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2026.3651099","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083471623","display_name":"Pengpeng Qiang","orcid":"https://orcid.org/0009-0000-3622-8769"},"institutions":[{"id":"https://openalex.org/I181877577","display_name":"Shanxi University","ror":"https://ror.org/03y3e3s17","country_code":"CN","type":"education","lineage":["https://openalex.org/I181877577"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengpeng Qiang","raw_affiliation_strings":["Key Laboratory of Computational Intelligence and Chinese Information Processing of Ministry of Education, School of Computer and Information Technology, Shanxi University, Taiyuan, China"],"raw_orcid":"https://orcid.org/0009-0000-3622-8769","affiliations":[{"raw_affiliation_string":"Key Laboratory of Computational Intelligence and Chinese Information Processing of Ministry of Education, School of Computer and Information Technology, Shanxi University, Taiyuan, China","institution_ids":["https://openalex.org/I181877577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122025730","display_name":"Hongye Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I181877577","display_name":"Shanxi University","ror":"https://ror.org/03y3e3s17","country_code":"CN","type":"education","lineage":["https://openalex.org/I181877577"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongye Tan","raw_affiliation_strings":["Key Laboratory of Computational Intelligence and Chinese Information Processing of Ministry of Education, School of Computer and Information Technology, Shanxi University, Taiyuan, China"],"raw_orcid":"https://orcid.org/0000-0002-5858-899X","affiliations":[{"raw_affiliation_string":"Key Laboratory of Computational Intelligence and Chinese Information Processing of Ministry of Education, School of Computer and Information Technology, Shanxi University, Taiyuan, China","institution_ids":["https://openalex.org/I181877577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031874696","display_name":"Hu Zhang","orcid":"https://orcid.org/0000-0003-3327-6516"},"institutions":[{"id":"https://openalex.org/I181877577","display_name":"Shanxi University","ror":"https://ror.org/03y3e3s17","country_code":"CN","type":"education","lineage":["https://openalex.org/I181877577"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hu Zhang","raw_affiliation_strings":["Key Laboratory of Computational Intelligence and Chinese Information Processing of Ministry of Education, School of Computer and Information Technology, Shanxi University, Taiyuan, China"],"raw_orcid":"https://orcid.org/0000-0003-0912-4870","affiliations":[{"raw_affiliation_string":"Key Laboratory of Computational Intelligence and Chinese Information Processing of Ministry of Education, School of Computer and Information Technology, Shanxi University, Taiyuan, China","institution_ids":["https://openalex.org/I181877577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122071976","display_name":"Xiaoli Li","orcid":null},"institutions":[{"id":"https://openalex.org/I152815399","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87","country_code":"SG","type":"education","lineage":["https://openalex.org/I152815399"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Xiaoli Li","raw_affiliation_strings":["Information Systems Technology and Design, Singapore University of Technology and Design, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-0762-6562","affiliations":[{"raw_affiliation_string":"Information Systems Technology and Design, Singapore University of Technology and Design, Singapore","institution_ids":["https://openalex.org/I152815399"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122199966","display_name":"Ru Li","orcid":null},"institutions":[{"id":"https://openalex.org/I181877577","display_name":"Shanxi University","ror":"https://ror.org/03y3e3s17","country_code":"CN","type":"education","lineage":["https://openalex.org/I181877577"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ru Li","raw_affiliation_strings":["Key Laboratory of Computational Intelligence and Chinese Information Processing of Ministry of Education, School of Computer and Information Technology, Shanxi University, Taiyuan, China"],"raw_orcid":"https://orcid.org/0000-0003-1545-5553","affiliations":[{"raw_affiliation_string":"Key Laboratory of Computational Intelligence and Chinese Information Processing of Ministry of Education, School of Computer and Information Technology, Shanxi University, Taiyuan, China","institution_ids":["https://openalex.org/I181877577"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5121780950","display_name":"Jiye Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I181877577","display_name":"Shanxi University","ror":"https://ror.org/03y3e3s17","country_code":"CN","type":"education","lineage":["https://openalex.org/I181877577"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiye Liang","raw_affiliation_strings":["Key Laboratory of Computational Intelligence and Chinese Information Processing of Ministry of Education, School of Computer and Information Technology, Shanxi University, Taiyuan, China"],"raw_orcid":"https://orcid.org/0000-0001-5887-9327","affiliations":[{"raw_affiliation_string":"Key Laboratory of Computational Intelligence and Chinese Information Processing of Ministry of Education, School of Computer and Information Technology, Shanxi University, Taiyuan, China","institution_ids":["https://openalex.org/I181877577"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02898897,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"28","issue":null,"first_page":"3242","last_page":"3255"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.32739999890327454,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.32739999890327454,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.14270000159740448,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0575999990105629,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.7562999725341797},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5645999908447266},{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.5468000173568726},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.43470001220703125},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4262999892234802},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4165000021457672},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.40400001406669617},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.34360000491142273},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.326200008392334}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8342000246047974},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.7562999725341797},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5645999908447266},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.5468000173568726},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5327000021934509},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43470001220703125},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4262999892234802},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4165000021457672},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.40400001406669617},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3785000145435333},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.34549999237060547},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.34360000491142273},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.326200008392334},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.3206000030040741},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.31940001249313354},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.29019999504089355},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.289000004529953},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.2782000005245209},{"id":"https://openalex.org/C158495155","wikidata":"https://www.wikidata.org/wiki/Q2369151","display_name":"Visual search","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C2908998935","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Visual Hallucination","level":2,"score":0.27309998869895935},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2703000009059906},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.2621999979019165},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.2606000006198883},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2542000114917755},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.2542000114917755}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2026.3651099","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2026.3651099","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5693666338920593}],"awards":[{"id":"https://openalex.org/G4401028597","display_name":null,"funder_award_id":"62576200","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8030840172","display_name":null,"funder_award_id":"62476161","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"significant":[1],"advancements":[2],"in":[3,13,125],"large":[4],"visual-language":[5],"models":[6],"(LVLMs),":[7],"hallucinations":[8,22,103,195],"remain":[9],"a":[10,39,84,151,168,207],"major":[11],"bottleneck":[12],"their":[14],"practical":[15,208],"applications.":[16],"One":[17],"key":[18],"factor":[19],"contributing":[20],"to":[21,74],"is":[23],"the":[24,30,46,51,55,61,68,71,99,131,156,162,175,184,198],"over-reliance":[25],"on":[26,78,108],"language":[27,109,139],"priors":[28],"during":[29],"autoregressive":[31],"text":[32,76,178],"generation":[33,132,176],"process.":[34],"Visual":[35],"Contrastive":[36,88],"Decoding":[37,89],"(VCD),":[38],"popular":[40],"technique":[41],"for":[42,116,155],"mitigating":[43],"hallucinations,":[44],"perturbs":[45],"visual":[47,65,80,96,117,123,137,147],"input":[48],"and":[49,138,145,196],"compares":[50],"perturbed":[52],"output":[53],"with":[54,144,183],"original.":[56],"However,":[57],"it":[58],"often":[59],"overlooks":[60],"gradual":[62],"attenuation":[63],"of":[64,177,200],"information":[66,97],"within":[67,98],"decoder,":[69,100],"limiting":[70],"model's":[72],"ability":[73],"generate":[75],"based":[77],"actual":[79],"content.":[81,186],"We":[82],"propose":[83],"novel,":[85],"training-free":[86],"method\u2014Visual-Enhanced":[87],"(VECD)\u2014which":[90],"addresses":[91],"this":[92],"issue":[93],"by":[94,105,134,166],"amplifying":[95],"thereby":[101],"reducing":[102],"caused":[104],"excessive":[106],"reliance":[107],"priors.":[110,140],"VECD":[111,192],"dynamically":[112],"selects":[113],"later":[114],"layers":[115],"injection,":[118],"while":[119],"retaining":[120],"only":[121],"essential":[122],"tokens":[124],"early":[126],"layers.":[127],"This":[128],"approach":[129],"enhances":[130],"process":[133],"adaptively":[135],"balancing":[136],"By":[141],"comparing":[142],"outputs":[143],"without":[146],"amplification,":[148],"we":[149,160],"derive":[150],"refined":[152],"probability":[153],"distribution":[154],"next":[157],"token.":[158],"Moreover,":[159],"improve":[161],"beam":[163],"search":[164],"algorithm":[165],"introducing":[167],"visually":[169],"guided":[170],"token":[171],"selection":[172],"strategy,":[173],"enabling":[174],"that":[179,191],"aligns":[180],"more":[181],"closely":[182],"image":[185],"Our":[187],"extensive":[188],"experiments":[189],"show":[190],"significantly":[193],"reduces":[194],"improves":[197],"quality":[199],"generated":[201],"text,":[202],"demonstrating":[203],"its":[204],"effectiveness":[205],"as":[206],"solution.":[209]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-08T00:00:00"}
