{"id":"https://openalex.org/W4406613241","doi":"https://doi.org/10.1145/3712700","title":"Integrated Image-Text Augmentation for Few-Shot Learning in Vision-Language Models","display_name":"Integrated Image-Text Augmentation for Few-Shot Learning in Vision-Language Models","publication_year":2025,"publication_date":"2025-01-20","ids":{"openalex":"https://openalex.org/W4406613241","doi":"https://doi.org/10.1145/3712700"},"language":"en","primary_location":{"id":"doi:10.1145/3712700","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3712700","pdf_url":null,"source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3712700","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101985101","display_name":"Ran Wang","orcid":"https://orcid.org/0009-0004-8397-3410"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Ran Wang","raw_affiliation_strings":["Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Sydney, Australia","Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Australia"],"raw_orcid":"https://orcid.org/0009-0004-8397-3410","affiliations":[{"raw_affiliation_string":"Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071218354","display_name":"Hua Zuo","orcid":"https://orcid.org/0000-0002-9122-0775"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Hua Zuo","raw_affiliation_strings":["Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Sydney, Australia","Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Australia"],"raw_orcid":"https://orcid.org/0000-0002-9122-0775","affiliations":[{"raw_affiliation_string":"Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087852871","display_name":"Zhen Fang","orcid":"https://orcid.org/0000-0003-0602-6255"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zhen Fang","raw_affiliation_strings":["Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Sydney, Australia","Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Australia"],"raw_orcid":"https://orcid.org/0000-0003-0602-6255","affiliations":[{"raw_affiliation_string":"Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100675577","display_name":"Jie L\u00fc","orcid":"https://orcid.org/0000-0003-0690-4732"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jie Lu","raw_affiliation_strings":["Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Sydney, Australia","Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Australia"],"raw_orcid":"https://orcid.org/0000-0003-0690-4732","affiliations":[{"raw_affiliation_string":"Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"Australian Artificial Intelligence Institute, Faculty of Engineering and IT, University of Technology Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101985101"],"corresponding_institution_ids":["https://openalex.org/I114017466"],"apc_list":null,"apc_paid":null,"fwci":2.1886,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.85457283,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"16","issue":"2","first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9111818075180054},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.6476365327835083},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6228576898574829},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5897078514099121},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5121395587921143},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4920189082622528}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9111818075180054},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.6476365327835083},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6228576898574829},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5897078514099121},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5121395587921143},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4920189082622528},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3712700","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3712700","pdf_url":null,"source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3712700","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3712700","pdf_url":null,"source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2863769803","display_name":null,"funder_award_id":"DE220101075","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"}],"funders":[{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W12634471","https://openalex.org/W1977295328","https://openalex.org/W2017814585","https://openalex.org/W2047643928","https://openalex.org/W2108598243","https://openalex.org/W2138011018","https://openalex.org/W2166049352","https://openalex.org/W2533598788","https://openalex.org/W2625674597","https://openalex.org/W2896457183","https://openalex.org/W2904008038","https://openalex.org/W2910453440","https://openalex.org/W2944223741","https://openalex.org/W2964194231","https://openalex.org/W2970476646","https://openalex.org/W2992308087","https://openalex.org/W3015113763","https://openalex.org/W3034942609","https://openalex.org/W3177525997","https://openalex.org/W3185341429","https://openalex.org/W3198377975","https://openalex.org/W3210129272","https://openalex.org/W4214876417","https://openalex.org/W4284961860","https://openalex.org/W4288116810","https://openalex.org/W4300362655","https://openalex.org/W4302305810","https://openalex.org/W4310924809","https://openalex.org/W4312310776","https://openalex.org/W4312935996","https://openalex.org/W4384697538","https://openalex.org/W4386065554","https://openalex.org/W4394862984","https://openalex.org/W4401075326","https://openalex.org/W4402353758","https://openalex.org/W4403780684","https://openalex.org/W6745136726","https://openalex.org/W6811433417"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Vision-language":[0],"models,":[1,78],"such":[2],"as":[3],"the":[4,67,84,106,139,142,154,164,174,179,186,193],"Contrastive":[5],"Language-Image":[6],"Pre-Training":[7],"(CLIP)":[8],"model,":[9],"have":[10,55,210],"achieved":[11],"significant":[12,215],"success":[13],"in":[14,23,58,125,173,198,205],"image":[15,34,52,70,77,85,116,207,220],"classification":[16,208],"tasks.":[17,128,202],"CLIP":[18,37,123],"demonstrates":[19],"high":[20],"expressive":[21],"power":[22],"few-shot":[24,59,126,200,206],"learning":[25,60,127,201],"scenarios":[26,209],"due":[27],"to":[28,62,137,169,218],"its":[29],"pairing":[30],"of":[31,47,141],"text":[32,187],"and":[33,102,133,145,157,185,196],"encoders.":[35],"However,":[36,87],"still":[38],"faces":[39],"over-fitting":[40,64],"when":[41],"trained":[42],"with":[43],"a":[44,114],"limited":[45],"number":[46],"samples.":[48],"To":[49,108],"mitigate":[50],"this,":[51],"augmentation":[53,71,117,221],"techniques":[54],"been":[56],"proposed":[57],"tasks":[61],"prevent":[63],"by":[65],"enriching":[66],"dataset.":[68],"Existing":[69],"methods,":[72],"primarily":[73],"designed":[74],"for":[75,88,122],"single-modal":[76],"focus":[79],"solely":[80],"on":[81],"transformations":[82],"within":[83],"itself.":[86],"CLIP,":[89],"merely":[90],"increasing":[91],"visual":[92,171],"variety":[93],"without":[94],"considering":[95],"textual":[96,159],"content":[97],"can":[98],"reduce":[99,146],"generalization":[100],"ability":[101],"may":[103],"even":[104],"mislead":[105],"model.":[107],"address":[109],"this":[110,162],"issue,":[111],"we":[112],"introduce":[113],"novel":[115],"approach\u2014Integrated":[118],"Image-Text":[119],"Augmentation":[120],"(ITA)\u2014":[121],"model":[124,165],"This":[129,189],"method":[130],"generates":[131],"new":[132],"diverse":[134],"augmented":[135,155],"images":[136,156,175],"increase":[138],"diversity":[140],"training":[143],"data":[144],"over-fitting.":[147],"Additionally,":[148],"ITA":[149,213],"establishes":[150],"an":[151],"alignment":[152],"between":[153,182],"their":[158],"descriptions.":[160,188],"Through":[161],"alignment,":[163],"not":[166],"only":[167],"learns":[168],"recognize":[170],"elements":[172,184],"but":[176],"also":[177],"understands":[178],"semantic":[180],"connections":[181],"these":[183],"dual-modal":[190],"approach":[191],"enhances":[192],"model\u2019s":[194],"flexibility":[195],"accuracy":[197],"processing":[199],"Extensive":[203],"experiments":[204],"demonstrated":[211],"that":[212],"shows":[214],"improvements":[216],"compared":[217],"various":[219],"techniques.":[222]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
