{"id":"https://openalex.org/W4415540629","doi":"https://doi.org/10.1145/3746027.3754713","title":"Dual Prompt Learning for Adapting Vision-Language Models to Downstream Image-Text Retrieval","display_name":"Dual Prompt Learning for Adapting Vision-Language Models to Downstream Image-Text Retrieval","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415540629","doi":"https://doi.org/10.1145/3746027.3754713"},"language":"en","primary_location":{"id":"doi:10.1145/3746027.3754713","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754713","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111299488","display_name":"Yifan Wang","orcid":"https://orcid.org/0009-0004-5607-3265"},"institutions":[{"id":"https://openalex.org/I4210125143","display_name":"Chengdu University","ror":"https://ror.org/034z67559","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210125143"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yifan Wang","raw_affiliation_strings":["College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China","institution_ids":["https://openalex.org/I4210125143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100453495","display_name":"Tao Wang","orcid":"https://orcid.org/0000-0002-2480-878X"},"institutions":[{"id":"https://openalex.org/I4210125143","display_name":"Chengdu University","ror":"https://ror.org/034z67559","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210125143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Wang","raw_affiliation_strings":["College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China","institution_ids":["https://openalex.org/I4210125143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024548751","display_name":"Chenwei Tang","orcid":"https://orcid.org/0000-0002-1749-986X"},"institutions":[{"id":"https://openalex.org/I4210125143","display_name":"Chengdu University","ror":"https://ror.org/034z67559","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210125143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenwei Tang","raw_affiliation_strings":["College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China","institution_ids":["https://openalex.org/I4210125143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026439520","display_name":"Caiyang Yu","orcid":"https://orcid.org/0000-0001-8246-1561"},"institutions":[{"id":"https://openalex.org/I4210125143","display_name":"Chengdu University","ror":"https://ror.org/034z67559","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210125143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Caiyang Yu","raw_affiliation_strings":["College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China","institution_ids":["https://openalex.org/I4210125143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114091239","display_name":"Zhengqing Zang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210125143","display_name":"Chengdu University","ror":"https://ror.org/034z67559","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210125143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhengqing Zang","raw_affiliation_strings":["College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China","institution_ids":["https://openalex.org/I4210125143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043013159","display_name":"Mengmi Zhang","orcid":"https://orcid.org/0000-0002-2694-7097"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Mengmi Zhang","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University, Singapore, Singapore and Deep NeuroCognition Lab, I2R and CFAR, Agency for Science, Technology and Research, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University, Singapore, Singapore and Deep NeuroCognition Lab, I2R and CFAR, Agency for Science, Technology and Research, Singapore, Singapore","institution_ids":["https://openalex.org/I115228651","https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026312781","display_name":"Shudong Huang","orcid":"https://orcid.org/0000-0001-6848-5460"},"institutions":[{"id":"https://openalex.org/I4210125143","display_name":"Chengdu University","ror":"https://ror.org/034z67559","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210125143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shudong Huang","raw_affiliation_strings":["College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China","institution_ids":["https://openalex.org/I4210125143"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073535763","display_name":"Jiancheng Lv","orcid":"https://orcid.org/0000-0001-6551-3884"},"institutions":[{"id":"https://openalex.org/I4210125143","display_name":"Chengdu University","ror":"https://ror.org/034z67559","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210125143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiancheng Lv","raw_affiliation_strings":["College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Sichuan University, Chengdu, China and Engineering Research Center of Machine Learning and Industry Intelligence, Ministry of Education, Chengdu, China","institution_ids":["https://openalex.org/I4210125143"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5111299488"],"corresponding_institution_ids":["https://openalex.org/I4210125143"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.3077661,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5922","last_page":"5931"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/downstream","display_name":"Downstream (manufacturing)","score":0.8453999757766724},{"id":"https://openalex.org/keywords/subcategory","display_name":"Subcategory","score":0.6139000058174133},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.49779999256134033},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.48559999465942383},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.45489999651908875},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.44760000705718994},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4146000146865845},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4117000102996826}],"concepts":[{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.8453999757766724},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.788100004196167},{"id":"https://openalex.org/C2780617661","wikidata":"https://www.wikidata.org/wiki/Q541563","display_name":"Subcategory","level":2,"score":0.6139000058174133},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5144000053405762},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.49779999256134033},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.48559999465942383},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.45489999651908875},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.44760000705718994},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43230000138282776},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4146000146865845},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4117000102996826},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4090999960899353},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.38499999046325684},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3702000081539154},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.32739999890327454},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.3158999979496002},{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.3000999987125397},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.2784999907016754},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.2574000060558319},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.25459998846054077}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3746027.3754713","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754713","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:dr.ntu.edu.sg:10356/202552","is_oa":false,"landing_page_url":"https://hdl.handle.net/10356/202552","pdf_url":null,"source":{"id":"https://openalex.org/S4306402609","display_name":"DR-NTU (Nanyang Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I172675005","host_organization_name":"Nanyang Technological University","host_organization_lineage":["https://openalex.org/I172675005"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":null,"raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W12634471","https://openalex.org/W1773149199","https://openalex.org/W1977295328","https://openalex.org/W2017814585","https://openalex.org/W2047643928","https://openalex.org/W2092939357","https://openalex.org/W2138011018","https://openalex.org/W2155904486","https://openalex.org/W2533598788","https://openalex.org/W2963090248","https://openalex.org/W3120348034","https://openalex.org/W3166986030","https://openalex.org/W3198377975","https://openalex.org/W3207493267","https://openalex.org/W4200632612","https://openalex.org/W4205991051","https://openalex.org/W4210894218","https://openalex.org/W4284677642","https://openalex.org/W4312310776","https://openalex.org/W4312480274","https://openalex.org/W4312972638","https://openalex.org/W4313181088","https://openalex.org/W4382458283","https://openalex.org/W4382468457","https://openalex.org/W4382999123","https://openalex.org/W4386071498","https://openalex.org/W4386072101","https://openalex.org/W4386072185","https://openalex.org/W4386076374","https://openalex.org/W4386076609","https://openalex.org/W4386076665","https://openalex.org/W4386790226","https://openalex.org/W4390871860","https://openalex.org/W4390872306","https://openalex.org/W4391451889","https://openalex.org/W4392270539","https://openalex.org/W4393148064","https://openalex.org/W4400524574","https://openalex.org/W4402781582","https://openalex.org/W4403792105"],"related_works":[],"abstract_inverted_index":{"Recently,":[0],"prompt":[1,58,79,101],"learning":[2,68],"has":[3],"achieved":[4],"remarkable":[5],"success":[6],"in":[7,39,175],"adapting":[8],"pre-trained":[9],"Vision-Language":[10],"Models":[11],"(VLMs)":[12],"to":[13,23,70,87,110,151],"downstream":[14,25,49,95,176,183],"tasks":[15],"such":[16],"as":[17,169],"image":[18],"classification.":[19],"However,":[20],"its":[21],"application":[22],"the":[24,36,48,89,94,100,118,124,138,161],"Image-Text":[26],"Retrieval":[27,164],"(ITR)":[28],"task":[29],"is":[30],"more":[31],"challenging.":[32],"We":[33],"find":[34],"that":[35,199],"challenge":[37],"lies":[38],"discriminating":[40],"both":[41,82],"fine-grained":[42,112],"attributes":[43],"and":[44,84,107,135,186,209],"similar":[45],"subcategories":[46],"of":[47,91,126],"data.":[50],"To":[51,155],"address":[52],"this":[53],"challenge,":[54],"we":[55,159],"propose":[56],"Dual":[57],"Learning":[59],"with":[60,148,190],"Joint":[61],"Category-Attribute":[62],"Reweighting":[63],"(DCAR),":[64],"a":[65,170],"novel":[66],"dual-prompt":[67],"framework":[69,76],"achieve":[71],"precise":[72],"image-text":[73],"matching.":[74],"The":[75,207],"dynamically":[77,122],"adjusts":[78],"vectors":[80],"from":[81,145],"semantic":[83],"visual":[85],"dimensions":[86],"improve":[88],"performance":[90,203],"CLIP":[92],"on":[93,99,130,196],"ITR":[96,174],"task.":[97],"Based":[98],"paradigm,":[102],"DCAR":[103,200],"jointly":[104],"optimizes":[105],"attribute":[106,119,127,192],"category":[108,139],"features":[109],"enhance":[111],"representation":[113],"learning.":[114],"Specifically,":[115],"(1)":[116],"at":[117,137,213],"level,":[120,140],"it":[121,141],"updates":[123],"weights":[125],"descriptions":[128],"based":[129],"text-image":[131],"mutual":[132],"information":[133],"correlation;":[134],"(2)":[136],"introduces":[142],"negative":[143],"samples":[144],"multiple":[146],"perspectives":[147],"category-matching":[149],"weighting":[150],"learn":[152],"subcategory":[153],"distinctions.":[154],"validate":[156],"our":[157],"method,":[158],"construct":[160],"Fine-class":[162],"Described":[163],"Dataset":[165],"(FDRD),":[166],"which":[167],"serves":[168],"challenging":[171],"benchmark":[172],"for":[173],"data":[177,210],"domains.":[178],"It":[179],"covers":[180],"over":[181,204],"1,500":[182],"fine":[184],"categories":[185],"230,000":[187],"image-caption":[188],"pairs":[189],"detailed":[191],"annotations.":[193],"Extensive":[194],"experiments":[195],"FDRD":[197],"demonstrate":[198],"achieves":[201],"state-of-the-art":[202],"existing":[205],"baselines.":[206],"code":[208],"are":[211],"available":[212],"https://github.com/wyf202322/DCAR.":[214]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-25T00:00:00"}
