{"id":"https://openalex.org/W7164826998","doi":"https://doi.org/10.1145/3805622.3810768","title":"Describing-Verifying-Scoring: A Hierarchical Reasoning Framework for Zero-Shot Composed Image Retrieval","display_name":"Describing-Verifying-Scoring: A Hierarchical Reasoning Framework for Zero-Shot Composed Image Retrieval","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164826998","doi":"https://doi.org/10.1145/3805622.3810768"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810768","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810768","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810768","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111209168","display_name":"Guquan Jing","orcid":null},"institutions":[{"id":"https://openalex.org/I141568987","display_name":"Hong Kong Baptist University","ror":"https://ror.org/0145fw131","country_code":"HK","type":"education","lineage":["https://openalex.org/I141568987"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Guquan Jing","raw_affiliation_strings":["Beijing Normal-Hong Kong Baptist University, Zhuhai, China and Hong Kong Baptist University, Hong Kong, China"],"raw_orcid":"https://orcid.org/0009-0007-4709-9932","affiliations":[{"raw_affiliation_string":"Beijing Normal-Hong Kong Baptist University, Zhuhai, China and Hong Kong Baptist University, Hong Kong, China","institution_ids":["https://openalex.org/I141568987"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073018274","display_name":"Peng Gao","orcid":"https://orcid.org/0009-0000-0848-9814"},"institutions":[{"id":"https://openalex.org/I141568987","display_name":"Hong Kong Baptist University","ror":"https://ror.org/0145fw131","country_code":"HK","type":"education","lineage":["https://openalex.org/I141568987"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Peng Gao","raw_affiliation_strings":["Beijing Normal-Hong Kong Baptist University, Zhuhai, China and Hong Kong Baptist University, Hong Kong, China"],"raw_orcid":"https://orcid.org/0009-0000-0848-9814","affiliations":[{"raw_affiliation_string":"Beijing Normal-Hong Kong Baptist University, Zhuhai, China and Hong Kong Baptist University, Hong Kong, China","institution_ids":["https://openalex.org/I141568987"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109692762","display_name":"Yujian Lee","orcid":"https://orcid.org/0009-0003-2514-3913"},"institutions":[{"id":"https://openalex.org/I141568987","display_name":"Hong Kong Baptist University","ror":"https://ror.org/0145fw131","country_code":"HK","type":"education","lineage":["https://openalex.org/I141568987"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yujian Lee","raw_affiliation_strings":["Beijing Normal-Hong Kong Baptist University, Zhuhai, China and Hong Kong Baptist University, Hong Kong, China"],"raw_orcid":"https://orcid.org/0009-0003-2514-3913","affiliations":[{"raw_affiliation_string":"Beijing Normal-Hong Kong Baptist University, Zhuhai, China and Hong Kong Baptist University, Hong Kong, China","institution_ids":["https://openalex.org/I141568987"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100341698","display_name":"Hui Zhang","orcid":"https://orcid.org/0000-0002-1681-7926"},"institutions":[{"id":"https://openalex.org/I141568987","display_name":"Hong Kong Baptist University","ror":"https://ror.org/0145fw131","country_code":"HK","type":"education","lineage":["https://openalex.org/I141568987"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Hui Zhang","raw_affiliation_strings":["Beijing Normal-Hong Kong Baptist University, Zhuhai, China"],"raw_orcid":"https://orcid.org/0000-0002-1681-7926","affiliations":[{"raw_affiliation_string":"Beijing Normal-Hong Kong Baptist University, Zhuhai, China","institution_ids":["https://openalex.org/I141568987"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93610381,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"21","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9470000267028809,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9470000267028809,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.02239999920129776,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.009399999864399433,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6746000051498413},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5665000081062317},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.5623000264167786},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4323999881744385},{"id":"https://openalex.org/keywords/hierarchical-database-model","display_name":"Hierarchical database model","score":0.4072999954223633},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.38499999046325684}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7807000279426575},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6746000051498413},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6201000213623047},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5665000081062317},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.5623000264167786},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4323999881744385},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40849998593330383},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.4072999954223633},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.38499999046325684},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3537999987602234},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3513999879360199},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32100000977516174},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.31119999289512634},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.287200003862381},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2685999870300293},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.25279998779296875},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810768","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810768","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810768","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810768","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2905544595","https://openalex.org/W3026458074","https://openalex.org/W3172514680","https://openalex.org/W3175684172","https://openalex.org/W3203247393","https://openalex.org/W4287891464","https://openalex.org/W4292828970","https://openalex.org/W4312825288","https://openalex.org/W4386065506","https://openalex.org/W4386071700","https://openalex.org/W4390873539","https://openalex.org/W4393159265","https://openalex.org/W4400530983","https://openalex.org/W4401044046","https://openalex.org/W4401996408","https://openalex.org/W4402716416","https://openalex.org/W4402727158","https://openalex.org/W4403792412","https://openalex.org/W4408353327","https://openalex.org/W4411631881","https://openalex.org/W4413145898","https://openalex.org/W4413146297","https://openalex.org/W4413146661","https://openalex.org/W4413147773","https://openalex.org/W4415708520","https://openalex.org/W7133196460","https://openalex.org/W7133224126","https://openalex.org/W7133231369","https://openalex.org/W7138193722","https://openalex.org/W7160028320"],"related_works":[],"abstract_inverted_index":{"Zero-Shot":[0],"Composed":[1],"Image":[2],"Retrieval":[3],"(ZS-CIR)":[4],"aims":[5],"to":[6,52,92,103,125],"identify":[7,104],"target":[8],"images":[9],"using":[10],"a":[11,15,63,68,85,100,111,142],"composed":[12],"query":[13],"of":[14,145],"reference":[16],"image":[17],"and":[18,48],"modification":[19],"text":[20],"without":[21],"labeled":[22],"triplets.":[23],"While":[24],"recent":[25],"advances":[26],"leverage":[27],"Multimodal":[28],"Large":[29],"Language":[30],"Models":[31],"(MLLMs)":[32],"for":[33,117],"intent":[34],"reasoning,":[35],"they":[36],"often":[37],"suffer":[38],"from":[39,122],"hallucination-induced":[40],"inaccuracies":[41],"where":[42],"misaligned":[43],"descriptions":[44],"degrade":[45],"retrieval":[46,147],"reliability,":[47],"insufficient":[49],"reasoning":[50,128],"due":[51],"shallow":[53],"prompting":[54],"strategies.":[55],"To":[56],"address":[57],"these":[58],"challenges,":[59],"we":[60],"propose":[61],"DVSCIR,":[62],"novel":[64],"training-free":[65],"framework":[66],"featuring":[67],"hierarchical":[69,112,143],"Describing-Verifying-Scoring":[70],"pipeline":[71],"with":[72],"MLLM.":[73],"Specifically,":[74],"the":[75,105,120,146],"Describing":[76],"stage":[77,87,98],"generates":[78],"an":[79],"initial":[80],"candidate":[81],"caption,":[82],"followed":[83],"by":[84],"Verifying":[86],"that":[88,155],"rectifies":[89],"potential":[90],"hallucinations":[91],"ensure":[93],"description":[94],"accuracy.":[95],"The":[96],"Scoring":[97],"performs":[99],"fine-grained":[101],"re-ranking":[102],"optimal":[106],"match.":[107],"Within":[108],"each":[109],"stage,":[110],"Chain-of-Thought":[113],"(CoT)":[114],"process":[115],"tailored":[116],"ZS-CIR":[118],"guides":[119],"MLLM":[121],"low-level":[123],"perception":[124],"deep":[126],"intentional":[127],"via":[129],"sequential":[130],"steps":[131],"within":[132],"structured":[133],"sections.":[134],"This":[135],"progression":[136],"ensures":[137],"robust":[138],"cross-modal":[139],"correspondence":[140],"through":[141],"refinement":[144],"process.":[148],"Extensive":[149],"experiments":[150],"across":[151],"four":[152],"benchmarks":[153],"demonstrate":[154],"DVSCIR":[156],"achieves":[157],"state-of-the-art":[158],"performance,":[159],"validating":[160],"its":[161],"effectiveness":[162],"in":[163],"ZS-CIR.":[164]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
