{"id":"https://openalex.org/W7137986166","doi":"https://doi.org/10.1609/aaai.v40i10.37768","title":"Taming the Phantom: Token-Asymmetric Filtering for Hallucination Mitigation in Large Vision-Language Models","display_name":"Taming the Phantom: Token-Asymmetric Filtering for Hallucination Mitigation in Large Vision-Language Models","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137986166","doi":"https://doi.org/10.1609/aaai.v40i10.37768"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i10.37768","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i10.37768","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i10.37768","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045662308","display_name":"Shuyi Ouyang","orcid":"https://orcid.org/0000-0003-4507-4153"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Shuyi Ouyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129669559","display_name":"Hongyi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hongyi Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129745268","display_name":"Gongfan Fang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gongfan Fang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129716588","display_name":"Xinyin Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xinyin Ma","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129725972","display_name":"Lanfen Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lanfen Lin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129709231","display_name":"Xinchao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xinchao Wang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5045662308"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19582665,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"10","first_page":"8206","last_page":"8214"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.2944999933242798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.2944999933242798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09650000184774399,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.032499998807907104,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.7044000029563904},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.6169999837875366},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6104999780654907},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5519000291824341},{"id":"https://openalex.org/keywords/visual-hallucination","display_name":"Visual Hallucination","score":0.4016999900341034},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.37549999356269836},{"id":"https://openalex.org/keywords/sensory-cue","display_name":"Sensory cue","score":0.3630000054836273}],"concepts":[{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.7044000029563904},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6541000008583069},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.6169999837875366},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6104999780654907},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5519000291824341},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5184000134468079},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.42089998722076416},{"id":"https://openalex.org/C2908998935","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Visual Hallucination","level":2,"score":0.4016999900341034},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.37549999356269836},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.3630000054836273},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.36149999499320984},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.30889999866485596},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.29440000653266907},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.29420000314712524},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.28870001435279846},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2863999903202057},{"id":"https://openalex.org/C2779010991","wikidata":"https://www.wikidata.org/wiki/Q2720909","display_name":"Artifact (error)","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.2513999938964844},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i10.37768","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i10.37768","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i10.37768","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i10.37768","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Hallucination":[0],"in":[1,13,62,67,108,149,165],"Large":[2],"Vision-Language":[3],"Models":[4],"(LVLMs)":[5],"remains":[6],"a":[7,47,56,181],"critical":[8,127],"challenge,":[9],"undermining":[10],"their":[11],"reliability":[12],"real-world":[14],"applications.":[15],"Existing":[16],"studies":[17],"have":[18],"investigated":[19],"the":[20,25,36,68,74,114,124,153,160,166],"causes":[21],"of":[22,59,73,117,126,155,162,183],"hallucination":[23],"at":[24],"modality":[26,37,76],"level":[27,38],"and":[28,50,77,123,158],"proposed":[29],"effective":[30],"strategies.":[31],"However,":[32],"interaction":[33],"patterns":[34],"beyond":[35],"remain":[39],"insufficiently":[40],"explored.":[41],"In":[42],"this":[43],"paper,":[44],"we":[45,105,136],"conduct":[46],"token-level":[48,112],"analysis":[49],"identify":[51,87],"two":[52,111],"key":[53,88],"phenomena:":[54],"(1)":[55],"small":[57],"subset":[58],"textual":[60,119],"tokens":[61,120,157,164],"LVLMs":[63,84,109],"exert":[64],"disproportionate":[65,115],"influence":[66,116,154,161],"visual-active":[69,167],"layers,":[70],"surpassing":[71],"that":[72,144,175],"visual":[75,80,89,128],"potentially":[78],"misleading":[79],"understanding;":[81],"(2)":[82],"while":[83],"can":[85,96],"correctly":[86],"information,":[90],"insufficient":[91],"focus":[92],"on":[93,102],"these":[94,134],"cues":[95,129],"sometimes":[97],"lead":[98],"to":[99,110],"hallucinations.":[100],"Based":[101],"such":[103],"observation,":[104],"attribute":[106],"hallucinations":[107,179],"causes:":[113],"certain":[118],"(phantom":[121],"tokens)":[122],"underutilization":[125],"(anchor":[130],"tokens).":[131],"To":[132],"mitigate":[133],"issues,":[135],"introduce":[137],"Token-Asymmetric":[138],"Filtering":[139],"(TAF)\u2014a":[140],"training-free,":[141],"plug-and-play":[142],"method":[143],"modulates":[145],"intermediate":[146],"attention":[147],"maps":[148],"LVLMs.":[150,185],"TAF":[151,176],"isolates":[152],"phantom":[156],"emphasizes":[159],"anchor":[163],"layers.":[168],"Experimental":[169],"results":[170],"across":[171,180],"multiple":[172],"benchmarks":[173],"demonstrate":[174],"significantly":[177],"mitigates":[178],"range":[182],"state-of-the-art":[184]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
