{"id":"https://openalex.org/W4393116558","doi":"https://doi.org/10.48550/arxiv.2403.14003","title":"Multi-Modal Hallucination Control by Visual Information Grounding","display_name":"Multi-Modal Hallucination Control by Visual Information Grounding","publication_year":2024,"publication_date":"2024-03-20","ids":{"openalex":"https://openalex.org/W4393116558","doi":"https://doi.org/10.48550/arxiv.2403.14003"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2403.14003","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.14003","pdf_url":"https://arxiv.org/pdf/2403.14003","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.14003","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113591724","display_name":"Alessandro Favero","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Favero, Alessandro","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036782268","display_name":"Luca Zancato","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zancato, Luca","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030073760","display_name":"Matthew Trager","orcid":"https://orcid.org/0000-0001-9304-6427"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Trager, Matthew","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103250144","display_name":"Siddharth Choudhary","orcid":"https://orcid.org/0000-0001-5329-1858"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choudhary, Siddharth","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034085630","display_name":"Pramuditha Perera","orcid":"https://orcid.org/0000-0003-2821-6367"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Perera, Pramuditha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065386783","display_name":"Alessandro Achille","orcid":"https://orcid.org/0000-0002-8163-8326"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Achille, Alessandro","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084355679","display_name":"Ashwin Swaminathan","orcid":"https://orcid.org/0000-0002-4279-369X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Swaminathan, Ashwin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5038328783","display_name":"Stefano Soatto","orcid":"https://orcid.org/0000-0003-2902-6362"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Soatto, Stefano","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5113591724"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10241","display_name":"Functional Brain Connectivity Studies","score":0.882099986076355,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10241","display_name":"Functional Brain Connectivity Studies","score":0.882099986076355,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13283","display_name":"Mental Health Research Topics","score":0.8360999822616577,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13397","display_name":"Hallucinations in medical conditions","score":0.7718999981880188,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.772705078125},{"id":"https://openalex.org/keywords/visual-hallucination","display_name":"Visual Hallucination","score":0.6114712357521057},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5216065645217896},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.47452083230018616},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4134616553783417},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35284459590911865},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3489258885383606},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.18343818187713623}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.772705078125},{"id":"https://openalex.org/C2908998935","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Visual Hallucination","level":2,"score":0.6114712357521057},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5216065645217896},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.47452083230018616},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4134616553783417},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35284459590911865},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3489258885383606},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.18343818187713623},{"id":"https://openalex.org/C118552586","wikidata":"https://www.wikidata.org/wiki/Q7867","display_name":"Psychiatry","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2403.14003","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.14003","pdf_url":"https://arxiv.org/pdf/2403.14003","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2403.14003","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2403.14003","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2403.14003","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.14003","pdf_url":"https://arxiv.org/pdf/2403.14003","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4393116558.pdf","grobid_xml":"https://content.openalex.org/works/W4393116558.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2419430421","https://openalex.org/W2334811251","https://openalex.org/W2386464051","https://openalex.org/W32883749","https://openalex.org/W1972094787","https://openalex.org/W2308791691","https://openalex.org/W1971933602","https://openalex.org/W39816624","https://openalex.org/W2002427771","https://openalex.org/W2949208013"],"abstract_inverted_index":{"Generative":[0],"Vision-Language":[1],"Models":[2],"(VLMs)":[3],"are":[4,13,51],"prone":[5],"to":[6,27,116,150],"generate":[7],"plausible-sounding":[8],"textual":[9],"answers":[10],"that,":[11],"however,":[12],"not":[14],"always":[15],"grounded":[16],"in":[17,202],"the":[18,40,53,56,66,88,91,95,100,109,152,156,171,189,197,212],"input":[19],"image.":[20],"We":[21],"investigate":[22],"this":[23,61],"phenomenon,":[24],"usually":[25],"referred":[26],"as":[28,48,218],"\"hallucination\"":[29],"and":[30,60,128,173,194,207,210,222],"show":[31,46,139,166],"that":[32,47,140,167],"it":[33],"stems":[34],"from":[35],"an":[36,136],"excessive":[37],"reliance":[38,54,154],"on":[39,55,155,214],"language":[41,96],"prior.":[42],"In":[43],"particular,":[44],"we":[45,73,138],"more":[49],"tokens":[50,103],"generated,":[52],"visual":[57,110],"prompt":[58,84,157],"decreases,":[59],"behavior":[62],"strongly":[63],"correlates":[64],"with":[65,104,108,129,145],"emergence":[67],"of":[68,90,102,176,199],"hallucinations.":[69],"To":[70],"reduce":[71,196],"hallucinations,":[72],"introduce":[74],"Multi-Modal":[75],"Mutual-Information":[76],"Decoding":[77],"(M3ID),":[78],"a":[79],"new":[80],"sampling":[81],"method":[82],"for":[83,188],"amplification.":[85],"M3ID":[86,112,141,193],"amplifies":[87],"influence":[89],"reference":[92],"image":[93,158],"over":[94],"prior,":[97],"hence":[98],"favoring":[99],"generation":[101],"higher":[105],"mutual":[106],"information":[107],"prompt.":[111],"can":[113,142],"be":[114,143],"applied":[115],"any":[117,161],"pre-trained":[118,177],"autoregressive":[119],"VLM":[120],"at":[121],"inference":[122],"time":[123],"without":[124,159],"necessitating":[125],"further":[126],"training":[127,134],"minimal":[130],"computational":[131],"overhead.":[132],"If":[133],"is":[135],"option,":[137],"paired":[144],"Direct":[146],"Preference":[147],"Optimization":[148],"(DPO)":[149],"improve":[151,211],"model's":[153],"requiring":[160],"labels.":[162],"Our":[163],"empirical":[164],"findings":[165],"our":[168],"algorithms":[169],"maintain":[170],"fluency":[172],"linguistic":[174],"capabilities":[175],"VLMs":[178],"while":[179],"reducing":[180],"hallucinations":[181],"by":[182,205,220],"mitigating":[183],"visually":[184],"ungrounded":[185],"answers.":[186],"Specifically,":[187],"LLaVA":[190],"13B":[191],"model,":[192],"M3ID+DPO":[195],"percentage":[198],"hallucinated":[200],"objects":[201],"captioning":[203],"tasks":[204],"25%":[206],"28%,":[208],"respectively,":[209],"accuracy":[213],"VQA":[215],"benchmarks":[216],"such":[217],"POPE":[219],"21%":[221],"24%.":[223]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
