{"id":"https://openalex.org/W4403791442","doi":"https://doi.org/10.1145/3664647.3681387","title":"Calibrating Prompt from History for Continual Vision-Language Retrieval and Grounding","display_name":"Calibrating Prompt from History for Continual Vision-Language Retrieval and Grounding","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791442","doi":"https://doi.org/10.1145/3664647.3681387"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681387","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681387","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102011966","display_name":"Tao Jin","orcid":"https://orcid.org/0000-0003-3564-1628"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tao Jin","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050690349","display_name":"W. B. Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weicai Yan","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076752203","display_name":"Ye Wang","orcid":"https://orcid.org/0000-0003-1054-4063"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Wang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013362450","display_name":"Sihang Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sihang Cai","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106159943","display_name":"Qifan Shuai","orcid":null},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qifan Shuai","raw_affiliation_strings":["Southeast University Cheng Xian College, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University Cheng Xian College, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079260216","display_name":"Zhou Zhao","orcid":"https://orcid.org/0000-0001-6121-0384"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhou Zhao","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102011966"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":0.8249,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.74196769,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"4302","last_page":"4311"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.662773847579956},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.6623843312263489},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4851011037826538},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4047061800956726},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3255433738231659},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.16557413339614868},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.09393775463104248}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.662773847579956},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.6623843312263489},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4851011037826538},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4047061800956726},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3255433738231659},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.16557413339614868},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.09393775463104248}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681387","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681387","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5699999928474426,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W2116522068","https://openalex.org/W2425121537","https://openalex.org/W2473930607","https://openalex.org/W2554863749","https://openalex.org/W2560647685","https://openalex.org/W2954929116","https://openalex.org/W2963588172","https://openalex.org/W2997591391","https://openalex.org/W2998356391","https://openalex.org/W3035524453","https://openalex.org/W3138516171","https://openalex.org/W3177032574","https://openalex.org/W4225484930","https://openalex.org/W4285247752","https://openalex.org/W4285606530","https://openalex.org/W4292433237","https://openalex.org/W4312238419","https://openalex.org/W4312351187","https://openalex.org/W4312956471","https://openalex.org/W4362653417","https://openalex.org/W4385570057","https://openalex.org/W4385571188","https://openalex.org/W4385571875","https://openalex.org/W4386071547","https://openalex.org/W4390872020","https://openalex.org/W4390873467","https://openalex.org/W4390873659","https://openalex.org/W4393159604","https://openalex.org/W4402702936","https://openalex.org/W4402754025","https://openalex.org/W4403791285","https://openalex.org/W4403791608","https://openalex.org/W4403792076"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"In":[0,36],"the":[1,24,33,70,77,144,156,167,172,175,180,187,194],"field":[2],"of":[3,23,64,124,174,196],"machine":[4],"learning,":[5],"continual":[6,52],"learning":[7,53],"is":[8],"a":[9,118,158],"crucial":[10],"concept":[11],"that":[12,136],"allows":[13],"models":[14],"to":[15,17,39,80,116,140],"adapt":[16],"non-stationary":[18],"data":[19],"distributions.":[20],"However,":[21],"most":[22],"existing":[25,93],"works":[26],"focus":[27],"on":[28],"uni-modal":[29],"settings":[30],"and":[31,61,76,107,213,216,224],"ignore":[32],"multi-modal":[34],"data.":[35],"this":[37,82,85],"paper,":[38],"enable":[40],"neural":[41],"networks":[42],"better":[43],"understand":[44],"diverse":[45],"modalities":[46],"in":[47,92,179],"real-world":[48],"scenario,":[49],"we":[50,68,87,131],"investigate":[51],"for":[54,122,153,162],"two":[55,89,138,176,208,217],"typical":[56],"vision-language":[57,205],"applications,":[58,206],"i.e.":[59],"retrieval":[60,209],"grounding.":[62],"Instead":[63],"conventional":[65],"exemplar-based":[66],"methods,":[67],"leverage":[69],"pre-trained":[71],"transformer":[72],"model":[73],"(e.g.":[74],"CLIP/GLIP)":[75],"prompt":[78],"technique":[79],"tackle":[81],"problem.":[83],"Under":[84],"scheme,":[86],"identify":[88],"critical":[90],"limitations":[91],"methods:":[94],"(1)":[95],"Unfamiliarity":[96],"across":[97],"tasks,":[98],"which":[99,112],"prevents":[100],"task-specific":[101,151],"prompts":[102,123,178,188],"from":[103,189],"achieving":[104],"forward":[105],"propagation;":[106],"(2)":[108],"Heterogeneity":[109],"between":[110],"modalities,":[111],"makes":[113],"it":[114],"difficult":[115],"guarantee":[117],"consistent":[119],"optimization":[120],"direction":[121],"different":[125],"modalities.":[126],"To":[127],"overcome":[128],"these":[129],"constraints,":[130],"design":[132],"Historical":[133],"Prompt":[134],"Calibration":[135],"includes":[137],"objectives":[139],"calibrate":[141],"prompts.":[142],"First,":[143],"intra-modal":[145],"relevance":[146,159],"estimation":[147],"helps":[148],"encode":[149],"sufficient":[150],"information":[152],"prompts,":[154],"with":[155,186],"help":[157],"estimator":[160],"developed":[161],"recognizing":[163],"task":[164,182],"relevance.":[165],"Second,":[166],"inter-modal":[168],"consistency":[169],"alignment":[170],"enhances":[171],"agreement":[173],"modality-specific":[177],"current":[181],"by":[183,203],"contrasting":[184],"them":[185],"previous":[190],"tasks.":[191],"We":[192],"evaluate":[193],"superiority":[195],"our":[197],"strategy":[198],"over":[199],"state-of-the":[200],"arts":[201],"methods":[202],"four":[204],"including":[207],"tasks":[210,219],"(i.e.":[211,220],"image-":[212],"video-text":[214],"retrieval)":[215],"grounding":[218],"referring":[221],"expression":[222],"comprehension":[223],"segmentation).":[225]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
