{"id":"https://openalex.org/W4409310887","doi":"https://doi.org/10.1109/tcsvt.2025.3558785","title":"Vision-Language Adaptive Clustering and Meta-Adaptation for Unsupervised Few-Shot Action Recognition","display_name":"Vision-Language Adaptive Clustering and Meta-Adaptation for Unsupervised Few-Shot Action Recognition","publication_year":2025,"publication_date":"2025-04-09","ids":{"openalex":"https://openalex.org/W4409310887","doi":"https://doi.org/10.1109/tcsvt.2025.3558785"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2025.3558785","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3558785","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jiaxin Chen","orcid":"https://orcid.org/0009-0000-1448-8172"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiaxin Chen","raw_affiliation_strings":["School of Computer Science and Technology, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086339479","display_name":"Jiawen Peng","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiawen Peng","raw_affiliation_strings":["School of Computer Science and Technology, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052439728","display_name":"Yanzuo Lu","orcid":"https://orcid.org/0000-0002-5554-8706"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanzuo Lu","raw_affiliation_strings":["School of Computer Science and Technology, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034685928","display_name":"Jianhuang Lai","orcid":"https://orcid.org/0000-0003-3883-2024"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian-Huang Lai","raw_affiliation_strings":["School of Computer Science and Technology, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085704922","display_name":"J. Andy","orcid":"https://orcid.org/0000-0002-0165-8416"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Andy J. Ma","raw_affiliation_strings":["School of Computer Science and Technology, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":2.4362,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.88119154,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"35","issue":"9","first_page":"9246","last_page":"9260"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.909600019454956,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7629581093788147},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6778525114059448},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6584211587905884},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.5191035270690918},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.51765376329422},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4967380166053772},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4268135726451874},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.41468286514282227},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.41170090436935425},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.393617182970047},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.37381511926651},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.14733147621154785},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.11159259080886841}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7629581093788147},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6778525114059448},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6584211587905884},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.5191035270690918},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.51765376329422},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4967380166053772},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4268135726451874},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.41468286514282227},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.41170090436935425},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.393617182970047},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37381511926651},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.14733147621154785},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.11159259080886841},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3558785","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3558785","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1951972270","display_name":null,"funder_award_id":"U22A2095","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5118888692","display_name":null,"funder_award_id":"2024A1515011882","funder_id":"https://openalex.org/F4320320671","funder_display_name":"National Research Foundation"},{"id":"https://openalex.org/G5509363380","display_name":null,"funder_award_id":"2024A1515011882","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"},{"id":"https://openalex.org/G7052660066","display_name":null,"funder_award_id":"62276281","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W1996881001","https://openalex.org/W2126579184","https://openalex.org/W2194775991","https://openalex.org/W2507009361","https://openalex.org/W2625366777","https://openalex.org/W2894873912","https://openalex.org/W2948242301","https://openalex.org/W2962934715","https://openalex.org/W2963070905","https://openalex.org/W2963155035","https://openalex.org/W2963524571","https://openalex.org/W2964105864","https://openalex.org/W2990503944","https://openalex.org/W3035374961","https://openalex.org/W3046698617","https://openalex.org/W3081628888","https://openalex.org/W3093455342","https://openalex.org/W3095374178","https://openalex.org/W3110190397","https://openalex.org/W3115964123","https://openalex.org/W3139344044","https://openalex.org/W3145385912","https://openalex.org/W3145659481","https://openalex.org/W3159481202","https://openalex.org/W3170837227","https://openalex.org/W3173271747","https://openalex.org/W3176780013","https://openalex.org/W3184485324","https://openalex.org/W3192801537","https://openalex.org/W3198377975","https://openalex.org/W3200749679","https://openalex.org/W3201944844","https://openalex.org/W4282930896","https://openalex.org/W4285184936","https://openalex.org/W4297697565","https://openalex.org/W4312259618","https://openalex.org/W4312733400","https://openalex.org/W4312959318","https://openalex.org/W4313046672","https://openalex.org/W4313156423","https://openalex.org/W4361762277","https://openalex.org/W4380032301","https://openalex.org/W4382465664","https://openalex.org/W4386047822","https://openalex.org/W4386057769","https://openalex.org/W4386065563","https://openalex.org/W4386065787","https://openalex.org/W4386172426","https://openalex.org/W4386790226","https://openalex.org/W4387695265","https://openalex.org/W4388487066","https://openalex.org/W4390872366","https://openalex.org/W4390872437","https://openalex.org/W4390874107","https://openalex.org/W4392426151","https://openalex.org/W4393864964","https://openalex.org/W4402813010","https://openalex.org/W4402851515","https://openalex.org/W4403844864","https://openalex.org/W4411150224"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W2366718574","https://openalex.org/W2359774528","https://openalex.org/W4298312966","https://openalex.org/W2325697621"],"abstract_inverted_index":{"Unsupervised":[0],"few-shot":[1,145,197],"action":[2,19,31,198],"recognition":[3],"is":[4,111,136,154,178],"a":[5,96,102,119,131],"practical":[6],"but":[7],"challenging":[8],"task,":[9],"which":[10,142],"adapts":[11],"knowledge":[12,94,173],"learned":[13],"from":[14],"unlabeled":[15,116],"videos":[16],"to":[17,41,57,138,161],"novel":[18,103,162],"classes":[20,32],"with":[21],"only":[22,74],"limited":[23,71],"labeled":[24],"data.":[25],"Without":[26],"annotated":[27],"data":[28],"of":[29,61,81,95,192],"base":[30],"for":[33,105,114,125,148,156,195],"meta-learning,":[34],"it":[35],"cannot":[36],"achieve":[37],"satisfactory":[38],"performance":[39,66],"due":[40],"the":[42,59,65,75,79,92,158,190],"low-quality":[43],"pseudo-classes":[44,62],"and":[45,63,169],"episodes.":[46],"Though":[47],"vision-language":[48,98,166],"pre-training":[49],"models":[50],"such":[51],"as":[52],"CLIP":[53,100],"can":[54],"be":[55,70],"employed":[56],"improve":[58],"quality":[60],"episodes,":[64],"improvements":[67],"may":[68],"still":[69],"by":[72,118,164,180],"using":[73],"visual":[76],"encoder":[77],"in":[78,101],"absence":[80],"textual":[82],"modality":[83,110],"information.":[84],"In":[85],"this":[86],"paper,":[87],"we":[88],"propose":[89],"fully":[90],"exploiting":[91],"multimodal":[93,181],"pre-trained":[97,159],"model":[99,160],"framework":[104],"unsupervised":[106,196],"video":[107,117],"meta-learning.":[108],"Textual":[109],"automatically":[112],"generated":[113],"each":[115],"video-to-text":[120],"transformer.":[121],"Multimodal":[122],"adaptive":[123,182],"clustering":[124],"episodic":[126,149],"sampling":[127],"(MACES)":[128],"based":[129],"on":[130,186],"video-text":[132],"ensemble":[133],"distance":[134],"metric":[135],"proposed":[137],"accurately":[139],"estimate":[140],"pseudo-classes,":[141],"constructs":[143],"high-quality":[144],"tasks":[146,163],"(episodes)":[147],"training.":[150],"Vision-language":[151],"meta-adaptation":[152],"(VLMA)":[153],"designed":[155],"adapting":[157],"category-aware":[165],"contrastive":[167],"learning":[168],"confidence-based":[170],"reliable":[171],"bidirectional":[172],"distillation.":[174],"The":[175],"final":[176],"prediction":[177],"obtained":[179],"inference.":[183],"Extensive":[184],"experiments":[185],"five":[187],"benchmarks":[188],"demonstrate":[189],"superiority":[191],"our":[193],"method":[194],"recognition.":[199]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
