{"id":"https://openalex.org/W4417070846","doi":"https://doi.org/10.1109/iccv51701.2025.01600","title":"Calibrating MLLM-as-a-judge via Multimodal Bayesian Prompt Ensembles","display_name":"Calibrating MLLM-as-a-judge via Multimodal Bayesian Prompt Ensembles","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4417070846","doi":"https://doi.org/10.1109/iccv51701.2025.01600"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.01600","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01600","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2509.08777","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054164497","display_name":"Eric Slyman","orcid":"https://orcid.org/0000-0002-2481-7942"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Eric Slyman","raw_affiliation_strings":["Adobe Systems"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adobe Systems","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115560745","display_name":"Mehrab Tanjim","orcid":null},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mehrab Tanjim","raw_affiliation_strings":["Adobe Systems"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adobe Systems","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047193437","display_name":"Kushal Kafle","orcid":"https://orcid.org/0000-0002-0847-7861"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kushal Kafle","raw_affiliation_strings":["Adobe Systems"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adobe Systems","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051259505","display_name":"Stefan Lee","orcid":"https://orcid.org/0000-0001-5953-1963"},"institutions":[{"id":"https://openalex.org/I131249849","display_name":"Oregon State University","ror":"https://ror.org/00ysfqy60","country_code":"US","type":"education","lineage":["https://openalex.org/I131249849"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stefan Lee","raw_affiliation_strings":["Oregon State University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Oregon State University","institution_ids":["https://openalex.org/I131249849"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5054164497"],"corresponding_institution_ids":["https://openalex.org/I1306409833"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.36836087,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"17224","last_page":"17234"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6712999939918518,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6712999939918518,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.06849999725818634,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.0357000008225441,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.6276999711990356},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.6157000064849854},{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.5189999938011169},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4528999924659729},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.4336000084877014},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.37459999322891235}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7817999720573425},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6782000064849854},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.6276999711990356},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.6157000064849854},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.5189999938011169},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45489999651908875},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4528999924659729},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.4336000084877014},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.37459999322891235},{"id":"https://openalex.org/C2778049539","wikidata":"https://www.wikidata.org/wiki/Q17002908","display_name":"Bayesian optimization","level":2,"score":0.3659000098705292},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3555000126361847},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.33550000190734863},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.26739999651908875}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.01600","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01600","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2509.08777","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.08777","pdf_url":"https://arxiv.org/pdf/2509.08777","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2509.08777","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.08777","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2509.08777","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.08777","pdf_url":"https://arxiv.org/pdf/2509.08777","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0,78],"large":[1],"language":[2],"models":[3,26],"(MLLMs)":[4],"are":[5],"increasingly":[6],"used":[7],"to":[8,61,98,128],"evaluate":[9],"text-to-image":[10],"(TTI)":[11],"generation":[12],"systems,":[13],"providing":[14],"automated":[15],"judgments":[16,120],"based":[17,103],"on":[18,104,136],"visual":[19,106],"and":[20,32,121,141,152,169],"textual":[21],"context.":[22],"However,":[23],"these":[24,47,69],"\"judge\"":[25],"often":[27],"suffer":[28],"from":[29],"biases,":[30],"overconfidence,":[31],"inconsistent":[33],"performance":[34],"across":[35,154],"diverse":[36],"image":[37,93,156],"domains.":[38],"While":[39],"prompt":[40,88,101],"ensembling":[41,58],"has":[42],"shown":[43],"promise":[44],"for":[45,64,166,175],"mitigating":[46],"issues":[48],"in":[49,117,147],"unimodal,":[50],"text-only":[51],"settings,":[52],"our":[53],"experiments":[54],"reveal":[55],"that":[56,113],"standard":[57],"methods":[59],"fail":[60],"generalize":[62],"effectively":[63],"TTI":[65,138,178],"tasks.":[66],"To":[67],"address":[68],"limitations,":[70],"we":[71],"propose":[72],"a":[73,86,171],"new":[74],"multimodal-aware":[75],"method":[76,84],"called":[77],"Mixture-of-Bayesian":[79],"Prompt":[80],"Ensembles":[81],"(MMB).":[82],"Our":[83,158],"uses":[85],"Bayesian":[87],"ensemble":[89],"approach":[90],"augmented":[91],"by":[92],"clustering,":[94],"allowing":[95],"the":[96,105,130,161],"judge":[97,167],"dynamically":[99],"assign":[100],"weights":[102],"characteristics":[107],"of":[108,163],"each":[109],"sample.":[110],"We":[111],"show":[112],"MMB":[114,143],"improves":[115],"accuracy":[116],"pairwise":[118],"preference":[119],"greatly":[122],"enhances":[123],"calibration,":[124],"making":[125],"it":[126],"easier":[127],"gauge":[129],"judge's":[131],"true":[132],"uncertainty.":[133],"In":[134],"evaluations":[135],"two":[137],"benchmarks,":[139],"HPSv2":[140],"MJBench,":[142],"outperforms":[144],"existing":[145],"baselines":[146],"alignment":[148],"with":[149],"human":[150],"annotations":[151],"calibration":[153,168],"varied":[155],"content.":[157],"findings":[159],"highlight":[160],"importance":[162],"multimodal-specific":[164],"strategies":[165],"suggest":[170],"promising":[172],"path":[173],"forward":[174],"reliable":[176],"large-scale":[177],"evaluation.":[179]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
