{"id":"https://openalex.org/W4403792064","doi":"https://doi.org/10.1145/3664647.3680808","title":"Improving Composed Image Retrieval via Contrastive Learning with Scaling Positives and Negatives","display_name":"Improving Composed Image Retrieval via Contrastive Learning with Scaling Positives and Negatives","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403792064","doi":"https://doi.org/10.1145/3664647.3680808"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3680808","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680808","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071268429","display_name":"Zhangchi Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhangchi Feng","raw_affiliation_strings":["CCSE, School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"CCSE, School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027015677","display_name":"Richong Zhang","orcid":"https://orcid.org/0000-0002-1207-0300"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Richong Zhang","raw_affiliation_strings":["CCSE, School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"CCSE, School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043077962","display_name":"Zhijie Nie","orcid":"https://orcid.org/0009-0004-3933-0522"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhijie Nie","raw_affiliation_strings":["CCSE, School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"CCSE, School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5071268429"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":2.0994,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.88817486,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1632","last_page":"1641"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7586427927017212},{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.6745466589927673},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5970830321311951},{"id":"https://openalex.org/keywords/true-positive-rate","display_name":"True positive rate","score":0.5614212155342102},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.5558790564537048},{"id":"https://openalex.org/keywords/negative","display_name":"Negative","score":0.5496208071708679},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.49656373262405396},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.45597824454307556},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4496111571788788},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.41139256954193115},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3524433672428131},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1302456557750702},{"id":"https://openalex.org/keywords/optics","display_name":"Optics","score":0.06252256035804749},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.05724486708641052}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7586427927017212},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.6745466589927673},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5970830321311951},{"id":"https://openalex.org/C2989486834","wikidata":"https://www.wikidata.org/wiki/Q3808900","display_name":"True positive rate","level":2,"score":0.5614212155342102},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.5558790564537048},{"id":"https://openalex.org/C113619468","wikidata":"https://www.wikidata.org/wiki/Q595597","display_name":"Negative","level":2,"score":0.5496208071708679},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.49656373262405396},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.45597824454307556},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4496111571788788},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.41139256954193115},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3524433672428131},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1302456557750702},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.06252256035804749},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.05724486708641052},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3680808","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680808","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W1773149199","https://openalex.org/W2096733369","https://openalex.org/W2108598243","https://openalex.org/W2560674852","https://openalex.org/W2798991696","https://openalex.org/W2905544595","https://openalex.org/W3035524453","https://openalex.org/W3172514680","https://openalex.org/W3175684172","https://openalex.org/W3176909828","https://openalex.org/W3203247393","https://openalex.org/W4224929299","https://openalex.org/W4292828970","https://openalex.org/W4304091775","https://openalex.org/W4313855701","https://openalex.org/W4360103755","https://openalex.org/W4361806825","https://openalex.org/W4378505291","https://openalex.org/W4386071700","https://openalex.org/W4386076215","https://openalex.org/W4386273457","https://openalex.org/W4386290297","https://openalex.org/W4386501573","https://openalex.org/W4387560058","https://openalex.org/W4390873539","https://openalex.org/W4393149123"],"related_works":["https://openalex.org/W2027184711","https://openalex.org/W1557094818","https://openalex.org/W4287692494","https://openalex.org/W3129715955","https://openalex.org/W3047594718","https://openalex.org/W2137954799","https://openalex.org/W4242820967","https://openalex.org/W4255955278","https://openalex.org/W4237360519","https://openalex.org/W4399575965"],"abstract_inverted_index":{"The":[0,130],"Composed":[1],"Image":[2],"Retrieval":[3],"(CIR)":[4],"task":[5],"aims":[6],"to":[7,95,124,140,145],"retrieve":[8],"target":[9],"images":[10],"using":[11],"a":[12,17,21,84,90,108,191],"composed":[13,187],"query":[14],"consisting":[15],"of":[16,78,80,119,122],"reference":[18],"image":[19,188],"and":[20,39,138,156,166,168,175,201],"modified":[22],"text.":[23],"Advanced":[24],"methods":[25,59],"often":[26],"utilize":[27],"contrastive":[28],"learning":[29],"as":[30],"the":[31,43,67,72,76,126,196],"optimization":[32],"objective,":[33],"which":[34,65],"benefits":[35],"from":[36],"adequate":[37],"positive":[38,55],"negative":[40,63,68],"examples.":[41,56],"However,":[42],"triplet":[44],"for":[45,71,98,112,195],"CIR":[46,147,193],"incurs":[47],"high":[48],"manual":[49],"annotation":[50],"costs,":[51],"resulting":[52],"in":[53,185],"limited":[54],"Furthermore,":[57],"existing":[58,146],"commonly":[60],"use":[61],"in-batch":[62],"sampling,":[64],"reduces":[66],"number":[69],"available":[70],"model.":[73],"To":[74,100],"address":[75],"problem":[77],"lack":[79],"positives,":[81],"we":[82,106],"propose":[83],"data":[85,202],"generation":[86],"method":[87,162,181],"by":[88],"leveraging":[89],"multi-modal":[91],"large":[92],"language":[93],"model":[94],"construct":[96],"triplets":[97],"CIR.":[99],"introduce":[101],"more":[102],"negatives":[103,123,167],"during":[104],"fine-tuning,":[105],"design":[107],"two-stage":[109],"fine-tuning":[110],"framework":[111],"CIR,":[113],"whose":[114],"second":[115],"stage":[116],"introduces":[117],"plenty":[118],"static":[120],"representations":[121],"optimize":[125],"representation":[127],"space":[128],"rapidly.":[129],"above":[131],"two":[132],"improvements":[133],"can":[134],"be":[135,141],"effectively":[136,163],"stacked":[137],"designed":[139],"plug-and-play,":[142],"easily":[143],"applied":[144],"models":[148],"without":[149],"changing":[150],"their":[151],"original":[152],"architectures.":[153],"Extensive":[154],"experiments":[155],"ablation":[157],"analysis":[158],"demonstrate":[159],"that":[160],"our":[161,180],"scales":[164],"positives":[165],"achieves":[169],"state-of-the-art":[170],"results":[171],"on":[172],"both":[173],"FashionIQ":[174],"CIRR":[176],"datasets.":[177],"In":[178],"addition,":[179],"also":[182],"performs":[183],"well":[184],"zero-shot":[186],"retrieval,":[189],"providing":[190],"new":[192],"solution":[194],"low-resources":[197],"scenario.":[198],"Our":[199],"code":[200],"are":[203],"released":[204],"at":[205],"https://github.com/BUAADreamer/SPN4CIR.":[206]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7}],"updated_date":"2026-02-26T08:16:20.718346","created_date":"2025-10-10T00:00:00"}
