{"id":"https://openalex.org/W7124881369","doi":"https://doi.org/10.1109/cbmi66578.2025.11339315","title":"Vision Projector: Improving Zero-Shot Composed Image Retrieval at Inference","display_name":"Vision Projector: Improving Zero-Shot Composed Image Retrieval at Inference","publication_year":2025,"publication_date":"2025-10-22","ids":{"openalex":"https://openalex.org/W7124881369","doi":"https://doi.org/10.1109/cbmi66578.2025.11339315"},"language":null,"primary_location":{"id":"doi:10.1109/cbmi66578.2025.11339315","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cbmi66578.2025.11339315","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Content-Based Multimedia Indexing (CBMI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064415860","display_name":"Hoang-Bao Le","orcid":null},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Hoang-Bao Le","raw_affiliation_strings":["Dublin City University,ADAPT Centre,Dublin,Ireland"],"affiliations":[{"raw_affiliation_string":"Dublin City University,ADAPT Centre,Dublin,Ireland","institution_ids":["https://openalex.org/I42934936"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123427802","display_name":"Allie Tran","orcid":null},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Allie Tran","raw_affiliation_strings":["Dublin City University,ADAPT Centre,Dublin,Ireland"],"affiliations":[{"raw_affiliation_string":"Dublin City University,ADAPT Centre,Dublin,Ireland","institution_ids":["https://openalex.org/I42934936"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100387323","display_name":"Thanh Binh Nguyen","orcid":"https://orcid.org/0000-0002-2260-8186"},"institutions":[{"id":"https://openalex.org/I123565023","display_name":"Vietnam National University Ho Chi Minh City","ror":"https://ror.org/00waaqh38","country_code":"VN","type":"education","lineage":["https://openalex.org/I123565023"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Binh T. Nguyen","raw_affiliation_strings":["Ho Chi Minh University of Science, Vietnam National University,Ho Chi Minh City,Vietnam"],"affiliations":[{"raw_affiliation_string":"Ho Chi Minh University of Science, Vietnam National University,Ho Chi Minh City,Vietnam","institution_ids":["https://openalex.org/I123565023"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071890018","display_name":"Liting Zhou","orcid":"https://orcid.org/0000-0002-7778-8743"},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Liting Zhou","raw_affiliation_strings":["Dublin City University,ADAPT Centre,Dublin,Ireland"],"affiliations":[{"raw_affiliation_string":"Dublin City University,ADAPT Centre,Dublin,Ireland","institution_ids":["https://openalex.org/I42934936"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5095025991","display_name":"Cathal Gurrin","orcid":null},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Cathal Gurrin","raw_affiliation_strings":["Dublin City University,ADAPT Centre,Dublin,Ireland"],"affiliations":[{"raw_affiliation_string":"Dublin City University,ADAPT Centre,Dublin,Ireland","institution_ids":["https://openalex.org/I42934936"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5064415860"],"corresponding_institution_ids":["https://openalex.org/I42934936"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.67094586,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7716000080108643,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7716000080108643,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.121799997985363,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.05480000004172325,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.6841999888420105},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5160999894142151},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4758000075817108},{"id":"https://openalex.org/keywords/visual-word","display_name":"Visual Word","score":0.4442000091075897},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.3792000114917755},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.37790000438690186},{"id":"https://openalex.org/keywords/image-matching","display_name":"Image matching","score":0.3765000104904175},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.37610000371932983}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7795000076293945},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.6841999888420105},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.671500027179718},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5565999746322632},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5160999894142151},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4758000075817108},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.4442000091075897},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3792000114917755},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.37790000438690186},{"id":"https://openalex.org/C2986492983","wikidata":"https://www.wikidata.org/wiki/Q861092","display_name":"Image matching","level":3,"score":0.3765000104904175},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.37610000371932983},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3596999943256378},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.337799996137619},{"id":"https://openalex.org/C92423082","wikidata":"https://www.wikidata.org/wiki/Q132146","display_name":"Zernike polynomials","level":3,"score":0.33709999918937683},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33340001106262207},{"id":"https://openalex.org/C199579030","wikidata":"https://www.wikidata.org/wiki/Q2851778","display_name":"Automatic image annotation","level":4,"score":0.30309998989105225},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2994999885559082},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.29280000925064087},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.28850001096725464},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2712000012397766},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.258899986743927},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C2776865275","wikidata":"https://www.wikidata.org/wiki/Q311666","display_name":"Projector","level":2,"score":0.25360000133514404}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cbmi66578.2025.11339315","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cbmi66578.2025.11339315","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Content-Based Multimedia Indexing (CBMI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5548578500747681}],"awards":[{"id":"https://openalex.org/G2792121568","display_name":null,"funder_award_id":"SFI/13/RC/2106_P2,18/CRT/6223","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"}],"funders":[{"id":"https://openalex.org/F4320320847","display_name":"Science Foundation Ireland","ror":"https://ror.org/0271asj38"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2886641317","https://openalex.org/W2905544595","https://openalex.org/W2963530300","https://openalex.org/W2964211610","https://openalex.org/W3094502228","https://openalex.org/W3203247393","https://openalex.org/W4312735840","https://openalex.org/W4312825288","https://openalex.org/W4386071700","https://openalex.org/W4389520168","https://openalex.org/W4390873539","https://openalex.org/W4390889743","https://openalex.org/W4400526206","https://openalex.org/W4400530983","https://openalex.org/W4402727158","https://openalex.org/W4402727764","https://openalex.org/W4402753980","https://openalex.org/W4412722575"],"related_works":[],"abstract_inverted_index":{"Composed":[0],"Image":[1],"Retrieval":[2],"(CIR)":[3],"involves":[4],"retrieving":[5],"a":[6,11,15,19,41,44,49],"target":[7],"image":[8,17],"based":[9],"on":[10,112],"query":[12],"composed":[13],"of":[14,58],"reference":[16],"and":[18,104],"textual":[20],"modification.":[21],"Zero-Shot":[22],"CIR":[23],"extends":[24],"this":[25],"task":[26],"by":[27,48,110],"removing":[28],"the":[29,80],"need":[30],"for":[31],"labeled":[32],"triplets":[33],"during":[34],"training.":[35],"Most":[36],"state-of-the-art":[37],"(SOTA)":[38],"methods":[39],"share":[40],"common":[42],"structure:":[43],"vision-language":[45],"encoder":[46],"followed":[47],"matching":[50],"module":[51,86],"using":[52,116],"Transformers":[53],"or":[54,61],"contrastive":[55],"learning.":[56],"Instead":[57],"increasing":[59],"data":[60],"model":[62],"complexity,":[63],"we":[64,68,78],"wonder":[65],"that:":[66],"Can":[67],"improve":[69],"retrieval":[70],"performance":[71,100],"at":[72],"inference":[73],"time?":[74],"To":[75],"answer":[76],"this,":[77],"propose":[79],"Vision":[81],"Projector":[82],"(VP)-a":[83],"lightweight,":[84],"plug-and-play":[85],"that":[87],"enhances":[88],"visual":[89],"representations":[90],"without":[91],"retraining.":[92],"Integrated":[93],"directly":[94],"into":[95],"MagicLens,":[96],"VP":[97],"consistently":[98],"improves":[99],"across":[101],"CIRR,":[102],"FashionIQ,":[103],"CIRCO.":[105],"Notably,":[106],"it":[107],"boosts":[108],"MagicLens":[109],"18%":[111],"CIRCO,":[113],"despite":[114],"not":[115],"its":[117],"strongest":[118],"variant.":[119],"Code":[120],"is":[121],"available":[122],"at:":[123],"https://github.com/baohl00/VisionProjector_ZSCIR.":[124]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-01-21T00:00:00"}
