{"id":"https://openalex.org/W4402980933","doi":"https://doi.org/10.1109/icme57554.2024.10687572","title":"MALIP: Improving Few-Shot Image Classification with Multimodal Fusion Enhancement","display_name":"MALIP: Improving Few-Shot Image Classification with Multimodal Fusion Enhancement","publication_year":2024,"publication_date":"2024-07-15","ids":{"openalex":"https://openalex.org/W4402980933","doi":"https://doi.org/10.1109/icme57554.2024.10687572"},"language":"en","primary_location":{"id":"doi:10.1109/icme57554.2024.10687572","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687572","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111346970","display_name":"Kaifen Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kaifen Cai","raw_affiliation_strings":["Sun Yat-Sen University,School of Computer Science and Engineering,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University,School of Computer Science and Engineering,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049156119","display_name":"Kaiyu Song","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaiyu Song","raw_affiliation_strings":["Sun Yat-Sen University,School of Artificial Intelligence,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University,School of Artificial Intelligence,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087294951","display_name":"Yan Pan","orcid":"https://orcid.org/0000-0002-0466-3763"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Pan","raw_affiliation_strings":["Sun Yat-Sen University,School of Computer Science and Engineering,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University,School of Computer Science and Engineering,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076868018","display_name":"Hanjiang Lai","orcid":"https://orcid.org/0000-0001-8057-6744"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hanjiang Lai","raw_affiliation_strings":["Sun Yat-Sen University,School of Computer Science and Engineering,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University,School of Computer Science and Engineering,Guangzhou,China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5111346970"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":0.6765,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.74581543,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9120000004768372,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9120000004768372,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.90420001745224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11609","display_name":"Geophysical Methods and Applications","score":0.9003999829292297,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6534005999565125},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.6304463148117065},{"id":"https://openalex.org/keywords/image-fusion","display_name":"Image fusion","score":0.623859703540802},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5843742489814758},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.563676118850708},{"id":"https://openalex.org/keywords/contextual-image-classification","display_name":"Contextual image classification","score":0.49507227540016174},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4873986840248108},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.45586562156677246},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.43572941422462463},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.12043660879135132}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6534005999565125},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.6304463148117065},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.623859703540802},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5843742489814758},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.563676118850708},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.49507227540016174},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4873986840248108},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.45586562156677246},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.43572941422462463},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.12043660879135132},{"id":"https://openalex.org/C191897082","wikidata":"https://www.wikidata.org/wiki/Q11467","display_name":"Metallurgy","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme57554.2024.10687572","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme57554.2024.10687572","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W12634471","https://openalex.org/W1977295328","https://openalex.org/W2017814585","https://openalex.org/W2047643928","https://openalex.org/W2108598243","https://openalex.org/W2138011018","https://openalex.org/W2194775991","https://openalex.org/W2533598788","https://openalex.org/W2964194231","https://openalex.org/W3097337894","https://openalex.org/W3198377975","https://openalex.org/W4225683910","https://openalex.org/W4313167301","https://openalex.org/W4313175608","https://openalex.org/W4386790226"],"related_works":["https://openalex.org/W2788731446","https://openalex.org/W2204403038","https://openalex.org/W3152170969","https://openalex.org/W2379054866","https://openalex.org/W2549658594","https://openalex.org/W2095903272","https://openalex.org/W2370195708","https://openalex.org/W1490651872","https://openalex.org/W2139242969","https://openalex.org/W2284201331"],"abstract_inverted_index":{"With":[0],"the":[1,33,46,76,99,104,118,125,129,139],"significant":[2],"progress":[3],"in":[4,17,27,50,98],"pre-trained":[5],"visionlanguage":[6],"models":[7],"like":[8],"CLIP,":[9],"recent":[10],"CLIP-based":[11],"methods":[12],"have":[13,23],"shown":[14],"impressive":[15],"performance":[16,78],"few-shot":[18,29,77,100,146],"tasks.":[19],"However,":[20],"CLIPbased":[21],"representations":[22],"a":[24,66,82,90,112],"natural":[25],"gap":[26],"downstream":[28,51],"tasks":[30,52],"due":[31],"to":[32,56,74,102,143],"label-related":[34,58],"multimodal":[35,59,69],"information":[36,122],"scarcity":[37],"caused":[38],"by":[39,111],"limited":[40],"data.":[41,105,127],"We":[42],"then":[43],"question,":[44],"whether":[45],"generative":[47,67],"model":[48,115],"trained":[49,97],"could":[53,95],"be":[54,96],"used":[55],"enhance":[57],"fusion.":[60],"In":[61,128],"this":[62],"paper,":[63],"we":[64,87,107,137],"propose":[65],"model-based":[68],"fusion":[70],"enhancement":[71],"method,":[72],"MALIP,":[73],"improve":[75],"of":[79,141],"CLIP":[80],"via":[81],"Multimodal":[83],"Adapter":[84],"module.":[85],"Specifically,":[86],"first":[88],"leverage":[89],"variational":[91],"autoencoder":[92],"(VAE)":[93],"that":[94],"scenario":[101],"extend":[103],"Then":[106],"create":[108],"adapter":[109],"weights":[110],"key-value":[113],"cache":[114],"constructed":[116],"from":[117],"image":[119,147],"and":[120],"text":[121],"based":[123],"on":[124,134],"expanded":[126],"end,":[130],"through":[131],"extensive":[132],"experiments":[133],"11":[135],"datasets,":[136],"demonstrate":[138],"effectiveness":[140],"MALIP":[142],"perform":[144],"state-of-the-art":[145],"classification.":[148]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
