{"id":"https://openalex.org/W7138340886","doi":"https://doi.org/10.1609/aaai.v40i31.39788","title":"CrossCheck-Bench: Diagnosing Compositional Failures in Multimodal Conflict Resolution","display_name":"CrossCheck-Bench: Diagnosing Compositional Failures in Multimodal Conflict Resolution","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138340886","doi":"https://doi.org/10.1609/aaai.v40i31.39788"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i31.39788","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i31.39788","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i31.39788","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Baoliang Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Baoliang Tian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yuxuan Si","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuxuan Si","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jilong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jilong Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"LingYao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"LingYao Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zhongyuan Bao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhongyuan Bao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zineng Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zineng Zhou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Tao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Sixu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sixu Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ziyao Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ziyao Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Mingze Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mingze Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zhouzhuo Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhouzhuo Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zhihao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhihao Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yi Ke Yun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi Ke Yun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ke Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ke Tian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ning Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ning Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Minghui Qiu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Minghui Qiu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":16,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.7826811,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"31","first_page":"25887","last_page":"25895"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9465000033378601,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9465000033378601,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.026200000196695328,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.003599999938160181,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.5648000240325928},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5608000159263611},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5522000193595886},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5480999946594238},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4733999967575073},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4683000147342682},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.3391000032424927},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.30959999561309814}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7146000266075134},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6432999968528748},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.5648000240325928},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5608000159263611},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.554099977016449},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5522000193595886},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5480999946594238},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4733999967575073},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4683000147342682},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4027000069618225},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3391000032424927},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.30959999561309814},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3077999949455261},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.28060001134872437},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2766000032424927},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.2565000057220459},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2524000108242035},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.25200000405311584},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i31.39788","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i31.39788","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i31.39788","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i31.39788","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4265836179256439}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"Large":[1],"Language":[2],"Models":[3],"are":[4],"primarily":[5],"trained":[6],"and":[7,19,29,69,111,117,125,186,216],"evaluated":[8],"on":[9,145],"aligned":[10],"image-text":[11],"pairs,":[12],"which":[13],"leaves":[14],"their":[15],"ability":[16],"to":[17,36,107,137],"detect":[18],"resolve":[20],"real-world":[21,86],"inconsistencies":[22],"largely":[23],"unexplored.":[24],"In":[25],"open-domain":[26],"applications":[27],"visual":[28,201],"textual":[30],"cues":[31],"often":[32],"conflict,":[33],"requiring":[34,170],"models":[35,124,142,222],"perform":[37,143],"structured":[38],"reasoning":[39,67,198,215],"beyond":[40],"surface-level":[41],"alignment.":[42],"We":[43,119],"introduce":[44],"CrossCheck-Bench,":[45],"a":[46,59,97,127,210],"diagnostic":[47],"benchmark":[48,57],"for":[49,75,157,220],"evaluating":[50],"contradiction":[51,139],"detection":[52],"in":[53,168,213],"multimodal":[54,214],"inputs.":[55],"The":[56,92],"adopts":[58],"hierarchical":[60],"task":[61],"framework":[62],"covering":[63],"three":[64],"levels":[65],"of":[66,224],"complexity":[68],"defines":[70],"seven":[71],"atomic":[72],"capabilities":[73],"essential":[74],"resolving":[76],"cross-modal":[77,226],"inconsistencies.":[78],"CrossCheck-Bench":[79],"includes":[80],"15k":[81],"question-answer":[82],"pairs":[83],"sourced":[84],"from":[85,134],"artifacts":[87],"with":[88,199],"synthetically":[89],"injected":[90],"contradictions.":[91],"dataset":[93],"is":[94],"constructed":[95],"through":[96],"multi-stage":[98],"annotation":[99],"pipeline":[100],"involving":[101],"more":[102,204],"than":[103],"450":[104],"expert":[105],"hours":[106],"ensure":[108],"semantic":[109],"validity":[110],"calibrated":[112],"difficulty":[113],"across":[114],"perception,":[115],"integration,":[116],"reasoning.":[118,159],"evaluate":[120],"13":[121],"state-of-the-art":[122],"vision-language":[123],"observe":[126],"consistent":[128],"performance":[129],"drop":[130],"as":[131,184],"tasks":[132,169],"shift":[133],"perceptual":[135],"matching":[136],"logical":[138],"detection.":[140],"Most":[141],"well":[144],"isolated":[146],"entity":[147],"recognition":[148],"but":[149],"fail":[150],"when":[151],"multiple":[152],"clues":[153],"must":[154],"be":[155],"synthesized":[156],"conflict":[158],"Capability-level":[160],"analysis":[161],"further":[162],"reveals":[163],"uneven":[164],"skill":[165],"acquisition,":[166],"especially":[167],"multi-step":[171],"inference":[172],"or":[173],"rule-based":[174],"validation.":[175],"Additional":[176],"probing":[177],"shows":[178],"that":[179,195],"conventional":[180],"prompting":[181],"strategies":[182],"such":[183],"Chain-of-Thought":[185],"Set-of-Mark":[187],"yield":[188],"only":[189],"marginal":[190],"gains.":[191],"By":[192],"contrast,":[193],"methods":[194],"interleave":[196],"symbolic":[197],"grounded":[200],"processing":[202],"achieve":[203],"stable":[205],"improvements.":[206],"These":[207],"results":[208],"highlight":[209],"persistent":[211],"bottleneck":[212],"suggest":[217],"new":[218],"directions":[219],"building":[221],"capable":[223],"robust":[225],"verification.":[227]},"counts_by_year":[],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2026-02-06T00:00:00"}
