{"id":"https://openalex.org/W4415538402","doi":"https://doi.org/10.1145/3746027.3755364","title":"Debiasing Multimodal Large Language Models via Penalization of Language Priors","display_name":"Debiasing Multimodal Large Language Models via Penalization of Language Priors","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415538402","doi":"https://doi.org/10.1145/3746027.3755364"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755364","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755364","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100376940","display_name":"Yifan Zhang","orcid":"https://orcid.org/0000-0002-6227-0183"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"YiFan Zhang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-6227-0183","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yang Shi","orcid":"https://orcid.org/0009-0003-9241-236X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Shi","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-9241-236X","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108259382","display_name":"Weichen Yu","orcid":"https://orcid.org/0009-0003-7935-2358"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weichen Yu","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, USA"],"raw_orcid":"https://orcid.org/0009-0003-7935-2358","affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048346353","display_name":"Qingsong Wen","orcid":"https://orcid.org/0000-0003-4516-2524"},"institutions":[{"id":"https://openalex.org/I58064216","display_name":"Bellevue University","ror":"https://ror.org/00b7ckn36","country_code":"US","type":"education","lineage":["https://openalex.org/I58064216"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qingsong Wen","raw_affiliation_strings":["Squirrel AI Learning, Bellevue, USA"],"raw_orcid":"https://orcid.org/0000-0003-4516-2524","affiliations":[{"raw_affiliation_string":"Squirrel AI Learning, Bellevue, USA","institution_ids":["https://openalex.org/I58064216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101440776","display_name":"Xue Wang","orcid":"https://orcid.org/0009-0004-2296-9688"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xue Wang","raw_affiliation_strings":["Alibaba Group, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0004-2296-9688","affiliations":[{"raw_affiliation_string":"Alibaba Group, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016603628","display_name":"Wenjing Yang","orcid":"https://orcid.org/0000-0002-6997-0406"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjing Yang","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0002-6997-0406","affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100599309","display_name":"Zhang Zhang","orcid":"https://orcid.org/0000-0001-9425-3065"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhang Zhang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-9425-3065","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115602506","display_name":"Liang Wang","orcid":"https://orcid.org/0000-0001-5224-8647"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Wang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5224-8647","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069394608","display_name":"Rong Jin","orcid":"https://orcid.org/0000-0002-8797-4646"},"institutions":[{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rong Jin","raw_affiliation_strings":["Meta, Seattle, USA"],"raw_orcid":"https://orcid.org/0000-0002-8797-4646","affiliations":[{"raw_affiliation_string":"Meta, Seattle, USA","institution_ids":["https://openalex.org/I58610484"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100376940"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879"],"apc_list":null,"apc_paid":null,"fwci":2.1406,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.90832173,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"4232","last_page":"4241"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/debiasing","display_name":"Debiasing","score":0.7825999855995178},{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.6575000286102295},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5167999863624573},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.492900013923645},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4221999943256378},{"id":"https://openalex.org/keywords/natural-language-generation","display_name":"Natural language generation","score":0.37070000171661377},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.359499990940094},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.3248000144958496}],"concepts":[{"id":"https://openalex.org/C2779458634","wikidata":"https://www.wikidata.org/wiki/Q24963715","display_name":"Debiasing","level":2,"score":0.7825999855995178},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7358999848365784},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.6575000286102295},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5443999767303467},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5167999863624573},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.492900013923645},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4221999943256378},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4081000089645386},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.37070000171661377},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.359499990940094},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34549999237060547},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3248000144958496},{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.31700000166893005},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.29670000076293945},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.29649999737739563},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.29179999232292175},{"id":"https://openalex.org/C92757383","wikidata":"https://www.wikidata.org/wiki/Q382497","display_name":"Affine transformation","level":2,"score":0.29089999198913574},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.2906000018119812},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.28610000014305115},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.28299999237060547},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.2718999981880188},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2660999894142151},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755364","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755364","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W2981884310","https://openalex.org/W4389523675","https://openalex.org/W4394862623"],"related_works":[],"abstract_inverted_index":{"In":[0],"the":[1,37,45,49,57,63,76,92,127,137,149,190,215,224,246],"realms":[2],"of":[3,48,65,78,151,192,201,217,226,248],"computer":[4],"vision":[5],"and":[6,90,251],"natural":[7],"language":[8],"processing,":[9],"Multimodal":[10],"Large":[11,51],"Language":[12,52],"Models":[13,53],"(MLLMs)":[14],"have":[15],"become":[16],"indispensable":[17],"tools,":[18],"proficient":[19],"in":[20,75,230,239],"generating":[21],"textual":[22],"responses":[23],"based":[24],"on":[25,176,189],"visual":[26,84,96],"inputs.":[27,85],"Despite":[28],"their":[29],"advancements,":[30],"our":[31,185,227],"investigation":[32,186],"reveals":[33],"a":[34,116,177,181],"noteworthy":[35],"bias:":[36],"generated":[38],"content":[39],"is":[40,139],"often":[41,70],"driven":[42],"more":[43,155,249],"by":[44,56,171],"inherent":[46],"priors":[47],"underlying":[50],"(LLMs)":[54],"than":[55],"input":[58],"image.":[59],"Empirical":[60],"experiments":[61,222],"underscore":[62],"persistence":[64],"this":[66,162],"bias,":[67],"as":[68,108,142],"MLLMs":[69,193],"provide":[71],"confident":[72],"answers":[73],"even":[74],"absence":[77],"relevant":[79],"images":[80],"or":[81,110],"given":[82],"incongruent":[83],"To":[86],"rectify":[87],"these":[88],"biases":[89],"redirect":[91],"model's":[93],"focus":[94],"toward":[95],"information,":[97],"we":[98,114,160,204],"propose":[99],"two":[100],"simple,":[101],"training-free":[102],"strategies.":[103],"First,":[104],"for":[105],"tasks":[106],"such":[107],"classification":[109],"multi-choice":[111],"question":[112],"answering,":[113],"introduce":[115],"''Post-Hoc":[117],"Debias''":[118],"method":[119,163],"using":[120],"an":[121,143],"affine":[122],"calibration":[123],"step":[124],"to":[125,147,164,245],"adjust":[126],"output":[128],"distribution.":[129],"This":[130],"approach":[131],"ensures":[132],"uniform":[133],"answer":[134],"scores":[135],"when":[136],"image":[138,179],"absent,":[140],"acting":[141],"effective":[144],"regularization":[145],"technique":[146],"alleviate":[148],"influence":[150],"LLM":[152],"priors.":[153],"For":[154],"intricate":[156],"open-ended":[157],"generation":[158,247],"tasks,":[159],"extend":[161],"''Visual":[165],"Debias":[166],"Decoding'',":[167],"which":[168],"mitigates":[169],"bias":[170],"contrasting":[172],"token":[173],"log-probabilities":[174],"conditioned":[175],"correct":[178],"versus":[180],"meaningless":[182],"one.":[183],"Additionally,":[184],"sheds":[187],"light":[188],"instability":[191],"across":[194],"various":[195],"decoding":[196],"configurations.":[197],"Through":[198],"systematic":[199],"exploration":[200],"different":[202],"settings,":[203],"achieve":[205],"significant":[206],"performance":[207],"improvements-surpassing":[208],"previously":[209],"reported":[210],"results-and":[211],"raise":[212],"concerns":[213],"about":[214],"fairness":[216],"current":[218],"evaluation":[219],"practices.":[220],"Comprehensive":[221],"substantiate":[223],"effectiveness":[225],"proposed":[228],"strategies":[229,234],"mitigating":[231],"biases.":[232],"These":[233],"not":[235],"only":[236],"prove":[237],"beneficial":[238],"minimizing":[240],"hallucinations":[241],"but":[242],"also":[243],"contribute":[244],"helpful":[250],"precise":[252],"illustrations.":[253]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-10-25T00:00:00"}
