{"id":"https://openalex.org/W4403780913","doi":"https://doi.org/10.1145/3664647.3681135","title":"VeCAF: Vision-language Collaborative Active Finetuning with Training Objective Awareness","display_name":"VeCAF: Vision-language Collaborative Active Finetuning with Training Objective Awareness","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403780913","doi":"https://doi.org/10.1145/3664647.3681135"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681135","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681135","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101803269","display_name":"Rongyu Zhang","orcid":"https://orcid.org/0000-0002-9174-1765"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Rongyu Zhang","raw_affiliation_strings":["Nanjing University &amp; Peking University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Nanjing University &amp; Peking University, Nanjing, China","institution_ids":["https://openalex.org/I881766915","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101649092","display_name":"Zefan Cai","orcid":"https://orcid.org/0009-0009-8849-8854"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zefan Cai","raw_affiliation_strings":["University of Wisconsin - Madison &amp; Peking University, Madison, WI, USA"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin - Madison &amp; Peking University, Madison, WI, USA","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076154259","display_name":"Huanrui Yang","orcid":"https://orcid.org/0000-0002-3384-4512"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Huanrui Yang","raw_affiliation_strings":["University of California, Berkeley &amp; University of Arizona, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Berkeley &amp; University of Arizona, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104243535","display_name":"Zidong Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zidong Liu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026912892","display_name":"Denis Gudovskiy","orcid":"https://orcid.org/0000-0002-6829-6667"},"institutions":[{"id":"https://openalex.org/I4210095956","display_name":"Panasonic (United States)","ror":"https://ror.org/00pvgrv63","country_code":"US","type":"company","lineage":["https://openalex.org/I1283155146","https://openalex.org/I4210095956"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Denis Gudovskiy","raw_affiliation_strings":["Panasonic Corporation, Mountainview, CA, USA"],"affiliations":[{"raw_affiliation_string":"Panasonic Corporation, Mountainview, CA, USA","institution_ids":["https://openalex.org/I4210095956"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044817132","display_name":"Tomoyuki Okuno","orcid":null},"institutions":[{"id":"https://openalex.org/I1283155146","display_name":"Panasonic (Japan)","ror":"https://ror.org/011tm7n37","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283155146"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tomoyuki Okuno","raw_affiliation_strings":["Panasonic Corporation, Osaka, Japan"],"affiliations":[{"raw_affiliation_string":"Panasonic Corporation, Osaka, Japan","institution_ids":["https://openalex.org/I1283155146"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103590982","display_name":"Yohei Nakata","orcid":null},"institutions":[{"id":"https://openalex.org/I1283155146","display_name":"Panasonic (Japan)","ror":"https://ror.org/011tm7n37","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283155146"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yohei Nakata","raw_affiliation_strings":["Panasonic Corporation, Osaka, Japan"],"affiliations":[{"raw_affiliation_string":"Panasonic Corporation, Osaka, Japan","institution_ids":["https://openalex.org/I1283155146"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047285420","display_name":"Kurt Keutzer","orcid":"https://orcid.org/0000-0003-3868-8501"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kurt Keutzer","raw_affiliation_strings":["University of California, Berkeley, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021459300","display_name":"Baobao Chang","orcid":"https://orcid.org/0000-0003-2824-6750"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baobao Chang","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084816122","display_name":"Yuan Du","orcid":"https://orcid.org/0000-0002-5316-619X"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Du","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039665206","display_name":"Li Du","orcid":"https://orcid.org/0000-0003-2687-6978"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Du","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013030532","display_name":"Shanghang Zhang","orcid":"https://orcid.org/0000-0003-4047-3526"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shanghang Zhang","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5101803269"],"corresponding_institution_ids":["https://openalex.org/I20231570","https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":0.9971,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.77821811,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"5451","last_page":"5459"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.744652509689331},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4117034673690796},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3678019046783447},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3200052082538605}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.744652509689331},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4117034673690796},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3678019046783447},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3200052082538605},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681135","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681135","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6899999976158142,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W569478347","https://openalex.org/W2108598243","https://openalex.org/W2124244761","https://openalex.org/W2155904486","https://openalex.org/W2471138382","https://openalex.org/W2798820905","https://openalex.org/W2956371155","https://openalex.org/W3093517588","https://openalex.org/W3095809828","https://openalex.org/W3138516171","https://openalex.org/W3159481202","https://openalex.org/W3179550234","https://openalex.org/W3184557372","https://openalex.org/W3216156094","https://openalex.org/W4214705313","https://openalex.org/W4306820534","https://openalex.org/W4312677475","https://openalex.org/W4365817417","https://openalex.org/W4386071709","https://openalex.org/W4390871935","https://openalex.org/W4390873311"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W230091440","https://openalex.org/W2390279801","https://openalex.org/W2233261550","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2810751659"],"abstract_inverted_index":{"Finetuning":[0],"a":[1,7,36,51],"pretrained":[2],"vision":[3,13],"model":[4,55,63],"(PVM)":[5],"is":[6,157,208],"common":[8],"technique":[9],"for":[10,120],"learning":[11],"downstream":[12],"tasks.":[14,168],"The":[15],"conventional":[16],"finetuning":[17],"process":[18],"with":[19,75,200],"the":[20,58,62,69,72,81,99,104,127,134,148,181,201],"randomly":[21],"sampled":[22],"data":[23,53,77,93,122],"points":[24],"results":[25],"in":[26,161],"diminished":[27],"training":[28,59,177],"efficiency.":[29],"To":[30],"address":[31],"this":[32,67],"drawback,":[33],"we":[34,97],"propose":[35],"novel":[37],"approach,":[38],"Vision-":[39],"languag":[40],"e":[41],"C":[42],"ollaborative":[43],"A":[44],"ctive":[45],"F":[46],"inetuning":[47],"(VeCAF).":[48],"VeCAF":[49,133,155,171],"optimizes":[50],"parametric":[52],"selection":[54,123],"by":[56],"incorporating":[57],"objective":[60],"of":[61,84,91,103,112,129,154,193,204],"being":[64],"tuned.":[65],"Effectively,":[66],"guides":[68],"PVM":[70,117],"towards":[71],"performance":[73,150,183],"goal":[74],"improved":[76],"and":[78,87,108,124,151,164,188],"computational":[79],"efficiency.With":[80],"ever-growing":[82],"feasibility":[83],"acquiring":[85],"labels":[86],"natural":[88],"language":[89],"annotations":[90,114],"image":[92,113,118,166],"through":[94],"web-scale":[95],"crawling,":[96],"exploit":[98],"inherent":[100],"semantic":[101],"richness":[102],"text":[105,110],"embedding":[106],"space":[107],"utilize":[109],"embeddings":[111],"to":[115,137,159,174,179,185],"augment":[116],"features":[119],"better":[121],"finetuning.":[125],"Furthermore,":[126],"flexibility":[128],"text-domain":[130],"augmentation":[131],"gives":[132],"unique":[135],"ability":[136],"handle":[138],"out-of-distribution":[139,165],"scenarios":[140],"without":[141],"external":[142],"augmented":[143],"data.":[144],"Extensive":[145],"experiments":[146],"show":[147],"leading":[149],"high":[152],"efficiency":[153],"that":[156],"superior":[158],"baselines":[160],"both":[162],"in-distribution":[163],"classification":[167],"On":[169],"ImageNet,":[170],"needs":[172],"up":[173],"3.3\u00d7":[175],"less":[176],"batches":[178],"reach":[180],"target":[182],"compared":[184],"full":[186],"fine-tuning":[187,198],"achieves":[189],"an":[190],"accuracy":[191],"improvement":[192],"2.8%":[194],"over":[195],"active":[196],"SOTA":[197],"methods":[199],"same":[202],"number":[203],"batches.":[205],"Our":[206],"code":[207],"now":[209],"available":[210],"at":[211],"https://github.com/RoyZry98/VeCAF-Pytorch.":[212]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
