{"id":"https://openalex.org/W4403791712","doi":"https://doi.org/10.1145/3664647.3681624","title":"SSAT-Adapter: Enhancing Vision-Language Model Few-shot Learning with Auxiliary Tasks","display_name":"SSAT-Adapter: Enhancing Vision-Language Model Few-shot Learning with Auxiliary Tasks","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791712","doi":"https://doi.org/10.1145/3664647.3681624"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681624","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681624","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681624?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681624?download=true","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007216116","display_name":"B. Chen","orcid":"https://orcid.org/0009-0001-5062-2890"},"institutions":[{"id":"https://openalex.org/I154130895","display_name":"University of Auckland","ror":"https://ror.org/03b94tp07","country_code":"NZ","type":"education","lineage":["https://openalex.org/I154130895"]}],"countries":["NZ"],"is_corresponding":true,"raw_author_name":"Bowen Chen","raw_affiliation_strings":["The University of Auckland, Auckland, New Zealand"],"affiliations":[{"raw_affiliation_string":"The University of Auckland, Auckland, New Zealand","institution_ids":["https://openalex.org/I154130895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017570709","display_name":"Yun Sing Koh","orcid":"https://orcid.org/0000-0001-7256-4049"},"institutions":[{"id":"https://openalex.org/I154130895","display_name":"University of Auckland","ror":"https://ror.org/03b94tp07","country_code":"NZ","type":"education","lineage":["https://openalex.org/I154130895"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Yun Sing Koh","raw_affiliation_strings":["The University of Auckland, Auckland, New Zealand"],"affiliations":[{"raw_affiliation_string":"The University of Auckland, Auckland, New Zealand","institution_ids":["https://openalex.org/I154130895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016115576","display_name":"Gillian Dobbie","orcid":"https://orcid.org/0000-0001-7245-0367"},"institutions":[{"id":"https://openalex.org/I154130895","display_name":"University of Auckland","ror":"https://ror.org/03b94tp07","country_code":"NZ","type":"education","lineage":["https://openalex.org/I154130895"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Gillian Dobbie","raw_affiliation_strings":["The University of Auckland, Auckland, New Zealand"],"affiliations":[{"raw_affiliation_string":"The University of Auckland, Auckland, New Zealand","institution_ids":["https://openalex.org/I154130895"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5007216116"],"corresponding_institution_ids":["https://openalex.org/I154130895"],"apc_list":null,"apc_paid":null,"fwci":0.3603,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.68309219,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1004","last_page":"1013"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adapter","display_name":"Adapter (computing)","score":0.8907835483551025},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7992563247680664},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4530876874923706},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.422479510307312},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.40304499864578247},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3382064700126648},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.1957576870918274}],"concepts":[{"id":"https://openalex.org/C177284502","wikidata":"https://www.wikidata.org/wiki/Q1005390","display_name":"Adapter (computing)","level":2,"score":0.8907835483551025},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7992563247680664},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4530876874923706},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.422479510307312},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.40304499864578247},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3382064700126648},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.1957576870918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681624","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681624","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681624?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3664647.3681624","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681624","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681624?download=true","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403791712.pdf","grobid_xml":"https://content.openalex.org/works/W4403791712.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W1977295328","https://openalex.org/W2047643928","https://openalex.org/W2108598243","https://openalex.org/W2138011018","https://openalex.org/W2155904486","https://openalex.org/W2533598788","https://openalex.org/W2962843773","https://openalex.org/W2964194231","https://openalex.org/W2999905431","https://openalex.org/W3015240477","https://openalex.org/W3035574324","https://openalex.org/W3035982802","https://openalex.org/W3042563449","https://openalex.org/W3090449556","https://openalex.org/W3118813946","https://openalex.org/W3119397537","https://openalex.org/W3141797743","https://openalex.org/W3167766009","https://openalex.org/W3173908982","https://openalex.org/W3186163092","https://openalex.org/W3198377975","https://openalex.org/W3198675127","https://openalex.org/W3202399036","https://openalex.org/W3212042142","https://openalex.org/W3214038996","https://openalex.org/W3214536939","https://openalex.org/W4200629618","https://openalex.org/W4221086122","https://openalex.org/W4283313485","https://openalex.org/W4292121845","https://openalex.org/W4310415316","https://openalex.org/W4312310776","https://openalex.org/W4313175608","https://openalex.org/W4313887231","https://openalex.org/W4362496215","https://openalex.org/W4366593985","https://openalex.org/W4379409172","https://openalex.org/W4386066684","https://openalex.org/W4386075985","https://openalex.org/W4386076671","https://openalex.org/W4386790226","https://openalex.org/W4390873802","https://openalex.org/W4390874497","https://openalex.org/W4393147331"],"related_works":["https://openalex.org/W2133028525","https://openalex.org/W4229060448","https://openalex.org/W4306381730","https://openalex.org/W2981692913","https://openalex.org/W3044188621","https://openalex.org/W3184035966","https://openalex.org/W2485605994","https://openalex.org/W2160602540","https://openalex.org/W3204019825","https://openalex.org/W4321593827"],"abstract_inverted_index":{"Traditional":[0],"deep":[1],"learning":[2,8,148],"models":[3],"often":[4],"struggle":[5],"in":[6,29,58,83,176],"few-shot":[7,30,59,84,147,177],"scenarios,":[9],"where":[10],"limited":[11,41,47],"labeled":[12,42],"data":[13],"is":[14],"available.":[15],"While":[16],"the":[17,40,53,110,119,144,167],"Contrastive":[18],"Language-Image":[19],"Pre-training":[20],"(CLIP)":[21],"model":[22],"demonstrates":[23],"impressive":[24],"zero-shot":[25],"capabilities,":[26],"its":[27],"performance":[28,80],"scenarios":[31],"remains":[32],"limited.":[33],"Existing":[34],"methods":[35],"primarily":[36],"aim":[37],"to":[38,72,91,104,116,131,172],"leverage":[39],"dataset,":[43],"but":[44],"this":[45],"offers":[46],"potential":[48],"for":[49],"improvement.":[50],"To":[51],"overcome":[52],"limitations":[54],"of":[55,121,153,169],"small":[56],"datasets":[57],"learning,":[60],"we":[61],"introduce":[62],"a":[63],"novel":[64],"framework,":[65],"SSAT-Adapter,":[66],"that":[67],"leverages":[68],"CLIP's":[69,79,88,106,174],"language":[70,89],"understanding":[71,90],"generate":[73],"informative":[74],"auxiliary":[75,99],"tasks":[76,165],"and":[77,81,113],"improve":[78],"adaptability":[82,175],"settings.":[85],"We":[86],"utilize":[87],"create":[92],"decision-boundary-focused":[93],"image":[94,157,178],"latents.":[95],"These":[96],"latents":[97],"form":[98],"tasks,":[100,134],"including":[101],"inter-class":[102],"instances":[103,115],"bridge":[105],"pre-trained":[107],"knowledge":[108],"with":[109],"provided":[111],"examples,":[112],"intra-class":[114],"subtly":[117],"expand":[118],"representation":[120],"target":[122],"classes.":[123],"A":[124],"self-paced":[125],"training":[126],"regime,":[127],"progressing":[128],"from":[129],"easier":[130],"more":[132],"complex":[133],"further":[135],"promotes":[136],"robust":[137],"learning.":[138],"Experiments":[139],"show":[140],"our":[141,170],"framework":[142],"outperforms":[143],"state-of-the-art":[145],"online":[146],"method":[149],"by":[150],"an":[151],"average":[152],"2.2%":[154],"on":[155,163],"eleven":[156],"classification":[158],"datasets.":[159],"Further":[160],"ablation":[161],"studies":[162],"various":[164],"demonstrate":[166],"effectiveness":[168],"approach":[171],"enhance":[173],"classification.":[179]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-10-10T00:00:00"}
