{"id":"https://openalex.org/W7161336489","doi":"https://doi.org/10.48550/arxiv.2605.14787","title":"Do Composed Image Retrieval Benchmarks Require Multimodal Composition?","display_name":"Do Composed Image Retrieval Benchmarks Require Multimodal Composition?","publication_year":2026,"publication_date":"2026-05-14","ids":{"openalex":"https://openalex.org/W7161336489","doi":"https://doi.org/10.48550/arxiv.2605.14787"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.14787","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14787","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.14787","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079764541","display_name":"Matteo Attimonelli","orcid":"https://orcid.org/0009-0003-6600-1938"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Attimonelli, Matteo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136257488","display_name":"Alessandro De Bellis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"De Bellis, Alessandro","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080213009","display_name":"Aryo Pradipta Gema","orcid":"https://orcid.org/0009-0007-1163-3531"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gema, Aryo Pradipta","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136219683","display_name":"Rohit Saxena","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saxena, Rohit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120051084","display_name":"Monica Sekoyan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sekoyan, Monica","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068169387","display_name":"Wai-Chung Kwan","orcid":"https://orcid.org/0000-0002-2942-4208"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kwan, Wai-Chung","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029434859","display_name":"Claudio Pomo","orcid":"https://orcid.org/0000-0001-5206-3909"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pomo, Claudio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010504829","display_name":"Alessandro Suglia","orcid":"https://orcid.org/0000-0002-3177-5197"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Suglia, Alessandro","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136251791","display_name":"Dietmar Jannach","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jannach, Dietmar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136273722","display_name":"Tommaso Di Noia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Di Noia, Tommaso","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136238433","display_name":"Pasquale Minervini","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Minervini, Pasquale","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.6161999702453613,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.6161999702453613,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.12620000541210175,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.1177000030875206,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6554999947547913},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6136999726295471},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.613099992275238},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.6108999848365784},{"id":"https://openalex.org/keywords/conflation","display_name":"Conflation","score":0.462799996137619},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4244000017642975},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.365200012922287},{"id":"https://openalex.org/keywords/conjunction","display_name":"Conjunction (astronomy)","score":0.3425000011920929}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8230000138282776},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6554999947547913},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6136999726295471},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.613099992275238},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.6108999848365784},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5701000094413757},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4912000000476837},{"id":"https://openalex.org/C130440534","wikidata":"https://www.wikidata.org/wiki/Q14946528","display_name":"Conflation","level":2,"score":0.462799996137619},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4244000017642975},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.365200012922287},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.3425000011920929},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.32820001244544983},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.3075000047683716},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3037000000476837},{"id":"https://openalex.org/C199579030","wikidata":"https://www.wikidata.org/wiki/Q2851778","display_name":"Automatic image annotation","level":4,"score":0.290800005197525},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2906999886035919},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28870001435279846},{"id":"https://openalex.org/C2780052074","wikidata":"https://www.wikidata.org/wiki/Q1128648","display_name":"Content-based image retrieval","level":4,"score":0.2833000123500824},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.267300009727478}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.14787","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14787","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.14787","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.14787","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Composed":[0],"Image":[1],"Retrieval":[2],"(CIR)":[3],"is":[4,24,39],"a":[5,10,14,18,27,79,88,120,172],"multimodal":[6,43,111,187,209],"retrieval":[7,177],"task":[8],"where":[9],"query":[11],"consists":[12],"of":[13,82,140,205],"reference":[15,50],"image":[16,29,51],"and":[17,21,52,73,152,175,197],"textual":[19,53],"modification,":[20],"the":[22],"goal":[23],"to":[25,41,93,202],"retrieve":[26],"target":[28],"satisfying":[30],"both.":[31],"In":[32,55],"principle,":[33],"strong":[34],"performance":[35,102],"on":[36,136,157,186],"CIR":[37,71,101,192],"benchmarks":[38,72,193],"assumed":[40],"require":[42],"composition,":[44],"i.e.,":[45],"combining":[46,179],"complementary":[47],"information":[48,188],"from":[49,105],"modification.":[54],"this":[56,61,116,158],"work,":[57],"we":[58,118,124,132],"show":[59],"that":[60],"assumption":[62],"does":[63],"not":[64],"always":[65],"hold.":[66],"Across":[67],"four":[68],"widely":[69],"used":[70],"eleven":[74],"Generalist":[75],"Multimodal":[76],"Embedding":[77],"models,":[78],"large":[80],"fraction":[81],"queries":[83,127,165],"can":[84,103,166],"be":[85,169],"solved":[86,170],"using":[87],"single":[89,173],"modality":[90],"(from":[91],"32.2%":[92],"83.6%),":[94],"revealing":[95],"pervasive":[96],"unimodal":[97,106],"shortcuts.":[98],"Thus,":[99],"high":[100],"arise":[104],"signals":[107],"rather":[108],"than":[109],"true":[110],"composition.":[112,210],"To":[113],"better":[114],"understand":[115],"issue,":[117],"perform":[119],"two-stage":[121],"audit.":[122],"First,":[123],"identify":[125],"shortcut-solvable":[126],"through":[128],"cross-model":[129],"analysis.":[130],"Second,":[131],"conduct":[133],"human":[134],"validation":[135],"4,741":[137],"shortcut-free":[138],"queries,":[139,200],"which":[141],"only":[142],"1,689":[143],"are":[144],"well-formed,":[145],"with":[146,171],"common":[147],"issues":[148],"including":[149],"ambiguous":[150],"edits":[151],"mismatched":[153],"targets.":[154],"Re-evaluating":[155],"models":[156],"validated":[159],"subset":[160],"reveals":[161],"qualitatively":[162],"different":[163],"behaviour:":[164],"no":[167],"longer":[168],"modality,":[174],"successful":[176],"requires":[178],"both":[180],"inputs.":[181],"While":[182],"accuracy":[183],"decreases,":[184],"reliance":[185],"increases.":[189],"Overall,":[190],"current":[191],"conflate":[194],"shortcut-solvable,":[195],"noisy,":[196],"genuinely":[198],"compositional":[199],"leading":[201],"an":[203],"overestimation":[204],"model":[206],"capability":[207],"in":[208]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-16T00:00:00"}
