{"id":"https://openalex.org/W7105839084","doi":"https://doi.org/10.1109/tcsvt.2025.3633729","title":"DPCA-Net: Dual-Prototype Consistency Alignment Network for Robust Multimodal Few-Shot Action Recognition","display_name":"DPCA-Net: Dual-Prototype Consistency Alignment Network for Robust Multimodal Few-Shot Action Recognition","publication_year":2025,"publication_date":"2025-11-17","ids":{"openalex":"https://openalex.org/W7105839084","doi":"https://doi.org/10.1109/tcsvt.2025.3633729"},"language":null,"primary_location":{"id":"doi:10.1109/tcsvt.2025.3633729","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3633729","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Xiaohui Zhao","orcid":"https://orcid.org/0009-0003-7669-5697"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaohui Zhao","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, Changchun, Jilin, China"],"raw_orcid":"https://orcid.org/0009-0003-7669-5697","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, Changchun, Jilin, China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xiangyu Meng","orcid":"https://orcid.org/0000-0002-2725-0451"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyu Meng","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, Changchun, Jilin, China"],"raw_orcid":"https://orcid.org/0000-0002-2725-0451","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, Changchun, Jilin, China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Qiyang Li","orcid":"https://orcid.org/0009-0001-8959-8605"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiyang Li","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, Changchun, Jilin, China"],"raw_orcid":"https://orcid.org/0009-0001-8959-8605","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, Changchun, Jilin, China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xingwang Cai","orcid":"https://orcid.org/0009-0005-8199-1794"},"institutions":[{"id":"https://openalex.org/I4385474403","display_name":"Changchun University of Technology","ror":"https://ror.org/052pakb34","country_code":null,"type":"education","lineage":["https://openalex.org/I4385474403"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingwang Cai","raw_affiliation_strings":["College of Computer Science and Engineering, Changchun University of Technology, Changchun, Jilin, China"],"raw_orcid":"https://orcid.org/0009-0005-8199-1794","affiliations":[{"raw_affiliation_string":"College of Computer Science and Engineering, Changchun University of Technology, Changchun, Jilin, China","institution_ids":["https://openalex.org/I4385474403"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Deyin Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I4385474403","display_name":"Changchun University of Technology","ror":"https://ror.org/052pakb34","country_code":null,"type":"education","lineage":["https://openalex.org/I4385474403"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Deyin Ma","raw_affiliation_strings":["College of Computer Science and Engineering, Changchun University of Technology, Changchun, Jilin, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Engineering, Changchun University of Technology, Changchun, Jilin, China","institution_ids":["https://openalex.org/I4385474403"]}]},{"author_position":"last","author":{"id":null,"display_name":"Xiaohu Shi","orcid":"https://orcid.org/0000-0002-5115-8137"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaohu Shi","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, Changchun, Jilin, China"],"raw_orcid":"https://orcid.org/0000-0002-5115-8137","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, Changchun, Jilin, China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.5205896,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"36","issue":"4","first_page":"5232","last_page":"5245"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9607999920845032,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9607999920845032,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.007899999618530273,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.0044999998062849045,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6664000153541565},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6660000085830688},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5091999769210815},{"id":"https://openalex.org/keywords/image-warping","display_name":"Image warping","score":0.492000013589859},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.48750001192092896},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.48159998655319214},{"id":"https://openalex.org/keywords/dynamic-time-warping","display_name":"Dynamic time warping","score":0.4729999899864197},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4587000012397766},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4523000121116638}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7718999981880188},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.697700023651123},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6664000153541565},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6660000085830688},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5091999769210815},{"id":"https://openalex.org/C157202957","wikidata":"https://www.wikidata.org/wiki/Q1659609","display_name":"Image warping","level":2,"score":0.492000013589859},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.48750001192092896},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.48159998655319214},{"id":"https://openalex.org/C88516994","wikidata":"https://www.wikidata.org/wiki/Q1268863","display_name":"Dynamic time warping","level":2,"score":0.4729999899864197},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46650001406669617},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4587000012397766},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4523000121116638},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.40630000829696655},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3986000120639801},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.39399999380111694},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3776000142097473},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.37279999256134033},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.3686999976634979},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.3499999940395355},{"id":"https://openalex.org/C152565575","wikidata":"https://www.wikidata.org/wiki/Q1124538","display_name":"Conditional random field","level":2,"score":0.3409000039100647},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.32030001282691956},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3151000142097473},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.3125999867916107},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.3037000000476837}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3633729","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3633729","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G533059332","display_name":null,"funder_award_id":"62272192","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8186159650","display_name":null,"funder_award_id":"20210201080GX","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Few-shot":[0],"action":[1,190],"recognition":[2],"seeks":[3],"to":[4,34,70,92,121,139],"recognize":[5],"novel":[6],"actions":[7],"with":[8],"limited":[9],"labeled":[10],"examples.":[11],"While":[12],"dual-modal":[13,48],"approaches":[14],"incorporating":[15],"video":[16,94],"and":[17,41,65,74,118,125,136,143,170,186],"textual":[18,100],"modalities":[19,40,69],"offer":[20],"enhanced":[21],"semantic":[22,37],"context,":[23],"existing":[24],"methods":[25],"often":[26],"rely":[27],"on":[28,149,168,173],"naive":[29],"feature":[30],"fusion":[31],"strategies,":[32],"failing":[33],"capture":[35],"deep":[36],"correlations":[38],"across":[39,68],"limiting":[42],"generalization.":[43],"We":[44],"propose":[45],"DPCA-Net,":[46],"a":[47,54],"metric":[49,66,123],"learning":[50,91],"framework":[51],"that":[52,154],"constructs":[53],"unified":[55],"dual-prototype":[56],"consistency":[57,67],"alignment":[58,183],"space.":[59],"DPCA-Net":[60,155],"explicitly":[61],"models":[62],"distributional,":[63],"structural,":[64],"enhance":[71],"prototype":[72,182],"quality":[73],"similarity":[75,138],"estimation.":[76],"It":[77,160],"integrates":[78],"three":[79,150],"core":[80],"components:":[81],"(1)":[82],"Frame-wise":[83],"Text-guided":[84],"Modeling":[85],"(FTM),":[86],"which":[87,131],"uses":[88],"conditional":[89],"prompt":[90],"embed":[93],"frame-level":[95],"visual":[96,117],"features":[97],"into":[98],"the":[99,178],"space,":[101],"achieving":[102,165],"structural":[103],"consistency;":[104,124],"(2)":[105],"Dual-Modal":[106],"Metric":[107],"Learning":[108],"via":[109],"dual-path":[110],"Dynamic":[111],"Time":[112],"Warping":[113],"(Dual-DTW),":[114],"jointly":[115],"aligning":[116],"cross-modal":[119,188],"prototypes":[120],"ensure":[122],"(3)":[126],"Distribution":[127],"Consistency":[128],"Mapping":[129],"(DCM),":[130],"leverages":[132],"Maximum":[133],"Mean":[134],"Discrepancy":[135],"cosine":[137],"align":[140],"support-query":[141],"distributions":[142],"reinforce":[144],"representation":[145],"robustness.":[146],"Extensive":[147],"experiments":[148],"benchmark":[151],"datasets":[152],"show":[153],"consistently":[156],"outperforms":[157],"prior":[158],"methods.":[159],"surpasses":[161],"CLIP-FSAR":[162],"by":[163],"1.3%\u20132.7%,":[164],"89.7%":[166],"(1-shot)":[167],"Kinetics":[169],"99.12%":[171],"(5-shot)":[172],"UCF-101.":[174],"These":[175],"results":[176],"highlight":[177],"effectiveness":[179],"of":[180],"consistency-driven":[181],"for":[184],"robust":[185],"generalizable":[187],"few-shot":[189],"recognition.":[191]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-17T00:00:00"}
