{"id":"https://openalex.org/W7138209648","doi":"https://doi.org/10.1609/aaai.v40i26.39284","title":"UniME-V2: MLLM-as-a-Judge for Universal Multimodal Embedding Learning","display_name":"UniME-V2: MLLM-as-a-Judge for Universal Multimodal Embedding Learning","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138209648","doi":"https://doi.org/10.1609/aaai.v40i26.39284"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i26.39284","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39284","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39284/43245","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39284/43245","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129746164","display_name":"Tiancheng Gu","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Tiancheng Gu","raw_affiliation_strings":["Miromind AI\nThe University of Sydney"],"affiliations":[{"raw_affiliation_string":"Miromind AI\nThe University of Sydney","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129739379","display_name":"Kaicheng Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210150287","display_name":"Urology Team","ror":"https://ror.org/048as6j09","country_code":"US","type":"other","lineage":["https://openalex.org/I4210150287"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kaicheng Yang","raw_affiliation_strings":["M.R.L Team"],"affiliations":[{"raw_affiliation_string":"M.R.L Team","institution_ids":["https://openalex.org/I4210150287"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129750439","display_name":"Kaichen Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I1320687","display_name":"Institute of Materials, Minerals and Mining","ror":"https://ror.org/05e7jqk20","country_code":"GB","type":"facility","lineage":["https://openalex.org/I1320687"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Kaichen Zhang","raw_affiliation_strings":["Miromind AI\nLMMs-Lab Team"],"affiliations":[{"raw_affiliation_string":"Miromind AI\nLMMs-Lab Team","institution_ids":["https://openalex.org/I1320687"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075570318","display_name":"Xiang An","orcid":"https://orcid.org/0009-0008-4652-8296"},"institutions":[{"id":"https://openalex.org/I4210150287","display_name":"Urology Team","ror":"https://ror.org/048as6j09","country_code":"US","type":"other","lineage":["https://openalex.org/I4210150287"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiang An","raw_affiliation_strings":["M.R.L Team"],"affiliations":[{"raw_affiliation_string":"M.R.L Team","institution_ids":["https://openalex.org/I4210150287"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100531299","display_name":"Ziyong Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210150287","display_name":"Urology Team","ror":"https://ror.org/048as6j09","country_code":"US","type":"other","lineage":["https://openalex.org/I4210150287"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ziyong Feng","raw_affiliation_strings":["M.R.L Team"],"affiliations":[{"raw_affiliation_string":"M.R.L Team","institution_ids":["https://openalex.org/I4210150287"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129679366","display_name":"Yueyi Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096977","display_name":"OMV Petrom (Romania)","ror":"https://ror.org/00t1n6362","country_code":"RO","type":"company","lineage":["https://openalex.org/I4210096977"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Yueyi Zhang","raw_affiliation_strings":["MiroMind AI"],"affiliations":[{"raw_affiliation_string":"MiroMind AI","institution_ids":["https://openalex.org/I4210096977"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129649650","display_name":"Weidong Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Weidong Cai","raw_affiliation_strings":["The University of Sydney"],"affiliations":[{"raw_affiliation_string":"The University of Sydney","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129729785","display_name":"Jiankang Deng","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jiankang Deng","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129659720","display_name":"Lidong Bing","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096977","display_name":"OMV Petrom (Romania)","ror":"https://ror.org/00t1n6362","country_code":"RO","type":"company","lineage":["https://openalex.org/I4210096977"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Lidong Bing","raw_affiliation_strings":["MiroMind AI"],"affiliations":[{"raw_affiliation_string":"MiroMind AI","institution_ids":["https://openalex.org/I4210096977"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5129746164"],"corresponding_institution_ids":["https://openalex.org/I129604602"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.4245182,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"26","first_page":"21378","last_page":"21386"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.5351999998092651,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.5351999998092651,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.14589999616146088,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.04390000179409981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7365999817848206},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.644599974155426},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6037999987602234},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.5953999757766724},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5440999865531921},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5054000020027161},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.49480000138282776},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.460099995136261}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7771000266075134},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7365999817848206},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.644599974155426},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6442999839782715},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6037999987602234},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.5953999757766724},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5440999865531921},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.527400016784668},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5054000020027161},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.49480000138282776},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.460099995136261},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.43369999527931213},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.40049999952316284},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.3935999870300293},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.37619999051094055},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.34880000352859497},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34709998965263367},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33250001072883606},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3100999891757965},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2992999851703644},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.29809999465942383},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.29109999537467957},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C2778493491","wikidata":"https://www.wikidata.org/wiki/Q7449072","display_name":"Semantic matching","level":3,"score":0.27070000767707825},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.2671000063419342}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i26.39284","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39284","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39284/43245","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i26.39284","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39284","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39284/43245","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.7554106116294861,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138209648.pdf","grobid_xml":"https://content.openalex.org/works/W7138209648.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Universal":[0,76],"multimodal":[1],"embedding":[2],"models":[3],"are":[4,145],"essential":[5],"in":[6,41,51],"various":[7],"tasks.":[8,226],"Existing":[9],"approaches":[10],"typically":[11],"use":[12],"in-batch":[13],"mining":[14],"to":[15,31,68,101,150],"identify":[16],"hard":[17,55,86,122,138,195],"negatives":[18,130,196],"by":[19],"measuring":[20],"the":[21,45,62,95,103,126,133,141,152,159,163,169,210],"similarity":[22,160],"of":[23,66,106,128,135],"query-candidate":[24,107],"pairs.":[25],"However,":[26],"these":[27],"methods":[28],"often":[29],"struggle":[30],"capture":[32],"subtle":[33],"semantic":[34,104,112,142,165,172],"differences":[35],"among":[36,174],"candidates":[37],"and":[38,54,72,109,131,201,213],"lack":[39],"diversity":[40],"negative":[42,87,123],"samples.":[43],"Moreover,":[44],"embeddings":[46],"exhibit":[47],"limited":[48],"discriminative":[49,179],"ability":[50],"distinguishing":[52],"false":[53,129],"negatives.":[56,139],"In":[57],"this":[58],"paper,":[59],"we":[60,185],"leverage":[61],"advanced":[63],"understanding":[64],"capabilities":[65],"MLLMs":[67,100],"enhance":[69],"representation":[70],"learning,":[71],"present":[73],"a":[74,84,119,188,198],"novel":[75],"Multimodal":[77],"Embedding(UniME-V2)":[78],"model.":[79],"Our":[80],"approach":[81],"first":[82],"constructs":[83],"potential":[85],"set":[88],"through":[89,197],"global":[90],"retrieval.":[91],"We":[92,205],"then":[93],"introduce":[94],"MLLM-as-a-Judge":[96],"mechanism,":[97],"which":[98],"utilizes":[99],"assess":[102],"alignment":[105],"pairs":[108],"generate":[110],"soft":[111,148,164],"matching":[113,143,166],"scores.":[114],"These":[115],"scores":[116,144],"serve":[117],"as":[118,147],"foundation":[120],"for":[121],"mining,":[124],"mitigating":[125],"impact":[127],"enabling":[132],"identification":[134],"diverse,":[136],"high-quality":[137],"Furthermore,":[140],"used":[146],"labels":[149],"mitigate":[151],"rigid":[153],"one-to-one":[154],"mapping":[155],"constraint.":[156],"By":[157],"aligning":[158],"matrix":[161],"with":[162],"score":[167],"matrix,":[168],"model":[170,190],"learns":[171],"distinctions":[173],"candidates,":[175],"significantly":[176],"enhancing":[177],"its":[178],"capacity.":[180],"To":[181],"further":[182],"improve":[183],"performance,":[184],"propose":[186],"UniME-V2,":[187],"reranking":[189],"trained":[191],"on":[192,209],"our":[193,219],"mined":[194],"joint":[199],"pairwise":[200],"listwise":[202],"optimization":[203],"approach.":[204],"conduct":[206],"comprehensive":[207],"experiments":[208],"MMEB":[211],"benchmark":[212],"multiple":[214],"retrieval":[215],"tasks,":[216],"demonstrating":[217],"that":[218],"method":[220],"achieves":[221],"state-of-the-art":[222],"performance":[223],"across":[224],"all":[225]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
