{"id":"https://openalex.org/W4416214871","doi":"https://doi.org/10.1109/tmm.2025.3632652","title":"Progressive Learning of Instance-Level Proxy Semantics for Few-Shot Action Recognition","display_name":"Progressive Learning of Instance-Level Proxy Semantics for Few-Shot Action Recognition","publication_year":2025,"publication_date":"2025-11-14","ids":{"openalex":"https://openalex.org/W4416214871","doi":"https://doi.org/10.1109/tmm.2025.3632652"},"language":null,"primary_location":{"id":"doi:10.1109/tmm.2025.3632652","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3632652","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101801577","display_name":"Fang Peng","orcid":"https://orcid.org/0000-0002-3948-7413"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fang Peng","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083991825","display_name":"Xiaoshan Yang","orcid":"https://orcid.org/0000-0001-5453-9755"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoshan Yang","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yaowei Wang","orcid":"https://orcid.org/0000-0003-2197-9038"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaowei Wang","raw_affiliation_strings":["Pengcheng Laboratory, Shenzhen, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Pengcheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101552487","display_name":"Changsheng Xu","orcid":"https://orcid.org/0000-0001-8343-9665"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changsheng Xu","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101801577"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879","https://openalex.org/I4210112150"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.34055108,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"28","issue":null,"first_page":"853","last_page":"864"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9538999795913696,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9538999795913696,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.014000000432133675,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.008999999612569809,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5928999781608582},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5374000072479248},{"id":"https://openalex.org/keywords/proxy","display_name":"Proxy (statistics)","score":0.4657999873161316},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.41359999775886536},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.33180001378059387},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.3167000114917755},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.30390000343322754}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8657000064849854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6036999821662903},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5928999781608582},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5374000072479248},{"id":"https://openalex.org/C2780148112","wikidata":"https://www.wikidata.org/wiki/Q1432581","display_name":"Proxy (statistics)","level":2,"score":0.4657999873161316},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43790000677108765},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.41359999775886536},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34200000762939453},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.33180001378059387},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.3167000114917755},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.30149999260902405},{"id":"https://openalex.org/C133488467","wikidata":"https://www.wikidata.org/wiki/Q6673524","display_name":"Long short term memory","level":4,"score":0.27709999680519104},{"id":"https://openalex.org/C197914299","wikidata":"https://www.wikidata.org/wiki/Q18650","display_name":"Semantic memory","level":3,"score":0.2718000113964081},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2563999891281128},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3632652","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3632652","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1100260481","display_name":null,"funder_award_id":"62072455","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1498893086","display_name":null,"funder_award_id":"62036012","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2246614840","display_name":null,"funder_award_id":"2021ZD0112200","funder_id":"https://openalex.org/F4320329860","funder_display_name":"National Science and Technology Major Project"},{"id":"https://openalex.org/G3087610523","display_name":null,"funder_award_id":"U23A20387","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6544960559","display_name":null,"funder_award_id":"62322212","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2108598243","https://openalex.org/W2126579184","https://openalex.org/W2302255633","https://openalex.org/W2625366777","https://openalex.org/W2894873912","https://openalex.org/W2927933146","https://openalex.org/W2963524571","https://openalex.org/W3016847592","https://openalex.org/W3034251792","https://openalex.org/W3035374961","https://openalex.org/W3095374178","https://openalex.org/W3173271747","https://openalex.org/W3192801537","https://openalex.org/W3202483439","https://openalex.org/W3203711169","https://openalex.org/W3207758636","https://openalex.org/W4240592325","https://openalex.org/W4304014690","https://openalex.org/W4312259618","https://openalex.org/W4312480274","https://openalex.org/W4312558481","https://openalex.org/W4312614039","https://openalex.org/W4312658081","https://openalex.org/W4312933868","https://openalex.org/W4312941859","https://openalex.org/W4313046672","https://openalex.org/W4376133174","https://openalex.org/W4387272106","https://openalex.org/W4387695265","https://openalex.org/W4387789891","https://openalex.org/W4388487066","https://openalex.org/W4388756695","https://openalex.org/W4389610328","https://openalex.org/W4390872297","https://openalex.org/W4390874610","https://openalex.org/W4391547560","https://openalex.org/W4393253100","https://openalex.org/W4394625734","https://openalex.org/W4396219542","https://openalex.org/W4396882767","https://openalex.org/W4399418349","https://openalex.org/W4402044247","https://openalex.org/W4402813010","https://openalex.org/W4403727066","https://openalex.org/W4411150224"],"related_works":[],"abstract_inverted_index":{"Few-shot":[0],"action":[1,135],"recognition":[2],"is":[3],"a":[4,76,108,113],"crucial":[5],"task":[6],"for":[7],"mitigating":[8],"the":[9,25,121,129,144],"challenges":[10],"of":[11,27,82,123,133,146],"data":[12],"scarcity":[13],"in":[14,19,60],"video":[15],"understanding.":[16],"Recent":[17],"advancements":[18],"large-scale":[20],"pre-trained":[21,33],"models":[22],"have":[23,45],"introduced":[24],"potential":[26],"incorporating":[28],"semantic":[29,98,102],"knowledge":[30],"from":[31],"multi-modal":[32],"models,":[34],"such":[35],"as":[36],"CLIP,":[37],"to":[38,65,67,93],"alleviate":[39],"these":[40],"challenges.":[41],"Although":[42],"some":[43],"progress":[44],"been":[46],"made,":[47],"existing":[48],"methods":[49],"still":[50],"rely":[51],"on":[52,139],"class-level":[53],"text":[54],"embeddings":[55],"that":[56],"are":[57],"inherently":[58],"low":[59],"diversity,":[61],"limiting":[62],"their":[63],"ability":[64],"generalize":[66],"unseen":[68],"actions.":[69],"To":[70],"overcome":[71],"this":[72],"limitation,":[73],"we":[74],"propose":[75],"novel":[77],"framework":[78],"called":[79],"Progressive":[80],"Learning":[81],"Instance-Level":[83],"Proxy":[84,89],"Semantics":[85],"(ProLIPS).":[86],"ProLIPS":[87],"integrates":[88],"Semantic":[90],"Diffusion":[91],"(PSD)":[92],"generate":[94],"rich,":[95],"instance-level":[96],"proxy":[97],"features":[99],"with":[100],"diverse":[101],"contents":[103],"and":[104,112,131],"temporal":[105],"dynamics,":[106],"utilizing":[107],"multi-step":[109],"CLIP-guidance":[110],"mechanism":[111],"time-conditioned":[114],"reverse":[115],"diffusion":[116],"process.":[117],"Our":[118],"approach":[119],"preserves":[120],"diversity":[122],"semantic-aligned":[124],"visual":[125],"features,":[126],"significantly":[127],"improving":[128],"generalization":[130],"robustness":[132],"few-shot":[134],"recognition.":[136],"Extensive":[137],"experiments":[138],"five":[140],"challenging":[141],"benchmarks":[142],"demonstrate":[143],"effectiveness":[145],"ProLIPS.":[147]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-14T00:00:00"}
