{"id":"https://openalex.org/W7160308046","doi":"https://doi.org/10.1109/wacv61042.2026.00290","title":"Scalpel: Fine-Grained Alignment of Attention Activation Manifolds via Mixture Gaussian Bridges to Mitigate Multimodal Hallucination","display_name":"Scalpel: Fine-Grained Alignment of Attention Activation Manifolds via Mixture Gaussian Bridges to Mitigate Multimodal Hallucination","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7160308046","doi":"https://doi.org/10.1109/wacv61042.2026.00290"},"language":null,"primary_location":{"id":"doi:10.1109/wacv61042.2026.00290","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv61042.2026.00290","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018232553","display_name":"Ziqiang Shi","orcid":"https://orcid.org/0000-0002-3105-6213"},"institutions":[{"id":"https://openalex.org/I4210159607","display_name":"Fujitsu (China)","ror":"https://ror.org/04w4yzw62","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210159607"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziqiang Shi","raw_affiliation_strings":["Fujitsu Research &amp; Development Center Co.,LTD.,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fujitsu Research &amp; Development Center Co.,LTD.,Beijing,China","institution_ids":["https://openalex.org/I4210159607"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135418554","display_name":"Rujie Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159607","display_name":"Fujitsu (China)","ror":"https://ror.org/04w4yzw62","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210159607"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rujie Liu","raw_affiliation_strings":["Fujitsu Research &amp; Development Center Co.,LTD.,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fujitsu Research &amp; Development Center Co.,LTD.,Beijing,China","institution_ids":["https://openalex.org/I4210159607"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103187328","display_name":"Shanshan Yu","orcid":"https://orcid.org/0000-0002-5508-5633"},"institutions":[{"id":"https://openalex.org/I2252096349","display_name":"Fujitsu (Japan)","ror":"https://ror.org/038e2g226","country_code":"JP","type":"company","lineage":["https://openalex.org/I2252096349"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shanshan Yu","raw_affiliation_strings":["Fujitsu Limited,Tokyo,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fujitsu Limited,Tokyo,Japan","institution_ids":["https://openalex.org/I2252096349"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015843901","display_name":"Satoshi Munakata","orcid":null},"institutions":[{"id":"https://openalex.org/I2252096349","display_name":"Fujitsu (Japan)","ror":"https://ror.org/038e2g226","country_code":"JP","type":"company","lineage":["https://openalex.org/I2252096349"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoshi Munakata","raw_affiliation_strings":["Fujitsu Limited,Tokyo,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fujitsu Limited,Tokyo,Japan","institution_ids":["https://openalex.org/I2252096349"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5135318949","display_name":"Koichi Shirahata","orcid":null},"institutions":[{"id":"https://openalex.org/I2252096349","display_name":"Fujitsu (Japan)","ror":"https://ror.org/038e2g226","country_code":"JP","type":"company","lineage":["https://openalex.org/I2252096349"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Koichi Shirahata","raw_affiliation_strings":["Fujitsu Limited,Tokyo,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fujitsu Limited,Tokyo,Japan","institution_ids":["https://openalex.org/I2252096349"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.71419478,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2964","last_page":"2973"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12553","display_name":"Psychedelics and Drug Studies","score":0.18400000035762787,"subfield":{"id":"https://openalex.org/subfields/3203","display_name":"Clinical Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12553","display_name":"Psychedelics and Drug Studies","score":0.18400000035762787,"subfield":{"id":"https://openalex.org/subfields/3203","display_name":"Clinical Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10241","display_name":"Functional Brain Connectivity Studies","score":0.10809999704360962,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.04619999974966049,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.4180999994277954},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.3926999866962433},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3476000130176544},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.32820001244544983},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.29649999737739563}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5259000062942505},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5189999938011169},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.4180999994277954},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.41190001368522644},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.3926999866962433},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.367900013923645},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3476000130176544},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.32820001244544983},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32670000195503235},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.29649999737739563},{"id":"https://openalex.org/C529865628","wikidata":"https://www.wikidata.org/wiki/Q1790740","display_name":"Manifold (fluid mechanics)","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2696000039577484},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wacv61042.2026.00290","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv61042.2026.00290","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2005308357","https://openalex.org/W2165880886","https://openalex.org/W2963170010","https://openalex.org/W2963518342","https://openalex.org/W4206471589","https://openalex.org/W4285149215","https://openalex.org/W4312846625","https://openalex.org/W4385245566","https://openalex.org/W4389523832","https://openalex.org/W4402670859","https://openalex.org/W4402726945","https://openalex.org/W4402727851","https://openalex.org/W4402753774","https://openalex.org/W4413144618","https://openalex.org/W4413146139","https://openalex.org/W4415798455","https://openalex.org/W7117297007","https://openalex.org/W7133193597","https://openalex.org/W7133235880"],"related_works":[],"abstract_inverted_index":{"Rapid":[0],"progress":[1],"in":[2,11,69,90],"large":[3,21],"vision-language":[4,12],"models":[5,23],"(LVLMs)":[6],"has":[7],"achieved":[8],"unprecedented":[9],"performance":[10],"tasks.":[13],"However,":[14],"due":[15],"to":[16,84,101,105],"the":[17],"strong":[18],"prior":[19],"of":[20,88],"language":[22],"(LLMs)":[24],"and":[25,74,92,95,117,123,128,136,147],"misaligned":[26],"attention":[27,54,64,89],"across":[28,133],"modalities,":[29],"LVLMs":[30],"often":[31],"generate":[32],"outputs":[33],"inconsistent":[34],"with":[35],"visual":[36],"content":[37],"-":[38],"termed":[39],"hallucination.":[40],"To":[41],"address":[42],"this,":[43],"we":[44],"propose":[45],"Scalpel,":[46],"a":[47,80,161],"method":[48],"that":[49,139],"reduces":[50],"hallucination":[51,93,127],"by":[52],"refining":[53],"activation":[55],"distributions":[56,87],"toward":[57],"more":[58],"credible":[59],"regions.":[60],"Scalpel":[61,112,140,152],"predicts":[62],"trusted":[63],"directions":[65],"for":[66],"each":[67],"head":[68],"Transformer":[70],"layers":[71],"during":[72],"inference":[73],"adjusts":[75,114],"activations":[76],"accordingly.":[77],"It":[78],"employs":[79],"Gaussian":[81,107],"mixture":[82],"model":[83],"capture":[85],"multi-peak":[86],"trust":[91,129],"manifolds,":[94],"uses":[96],"entropic":[97],"optimal":[98],"transport":[99],"(equivalent":[100],"Schr\u00f6dinger":[102],"bridge":[103],"problem)":[104],"map":[106],"components":[108],"precisely.":[109],"During":[110],"mitigation,":[111],"dynamically":[113],"intervention":[115],"strength":[116],"direction":[118],"based":[119],"on":[120],"component":[121],"membership":[122],"mapping":[124],"relationships":[125],"between":[126],"activations.":[130],"Extensive":[131],"experiments":[132],"multiple":[134],"datasets":[135],"benchmarks":[137],"demonstrate":[138],"effectively":[141],"mitigates":[142],"hallucinations,":[143],"outperforming":[144],"previous":[145],"methods":[146],"achieving":[148],"state-of-the-art":[149],"performance.":[150],"Moreover,":[151],"is":[153],"model-and":[154],"data-agnostic,":[155],"requiring":[156],"no":[157],"additional":[158],"computation,":[159],"only":[160],"single":[162],"decoding":[163],"step.":[164]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-06T00:00:00"}
