{"id":"https://openalex.org/W4399418349","doi":"https://doi.org/10.1145/3652583.3658044","title":"Multimodal Prototype-Enhanced Network for Few-Shot Action Recognition","display_name":"Multimodal Prototype-Enhanced Network for Few-Shot Action Recognition","publication_year":2024,"publication_date":"2024-05-30","ids":{"openalex":"https://openalex.org/W4399418349","doi":"https://doi.org/10.1145/3652583.3658044"},"language":"en","primary_location":{"id":"doi:10.1145/3652583.3658044","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658044","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658044","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658044","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072696977","display_name":"Xinzhe Ni","orcid":"https://orcid.org/0000-0001-7845-7762"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinzhe Ni","raw_affiliation_strings":["Tsinghua University, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-7845-7762","affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101685090","display_name":"Yong Liu","orcid":"https://orcid.org/0009-0000-3078-1598"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Liu","raw_affiliation_strings":["Tsinghua University, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0000-3078-1598","affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100771563","display_name":"Hao Wen","orcid":"https://orcid.org/0000-0003-3165-3859"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Wen","raw_affiliation_strings":["Tsinghua University, ShenZhen, China"],"raw_orcid":"https://orcid.org/0000-0003-3165-3859","affiliations":[{"raw_affiliation_string":"Tsinghua University, ShenZhen, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015360017","display_name":"Yatai Ji","orcid":"https://orcid.org/0009-0000-8096-2928"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yatai Ji","raw_affiliation_strings":["Tsinghua University, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0000-8096-2928","affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016038454","display_name":"Jing Xiao","orcid":"https://orcid.org/0000-0001-9615-4749"},"institutions":[{"id":"https://openalex.org/I4401726822","display_name":"Ping An (China)","ror":"https://ror.org/004yv2z91","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726822"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Xiao","raw_affiliation_strings":["Ping An Insurance (Group) Company of China, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-9615-4749","affiliations":[{"raw_affiliation_string":"Ping An Insurance (Group) Company of China, Shenzhen, China","institution_ids":["https://openalex.org/I4401726822"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020953714","display_name":"Yujiu Yang","orcid":"https://orcid.org/0000-0002-6427-1024"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yujiu Yang","raw_affiliation_strings":["Tsinghua University, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-6427-1024","affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5072696977"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":2.8569,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.91890869,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.9807000160217285,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8236311674118042},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6283434629440308},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.5912586450576782},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5682955384254456},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5053918361663818},{"id":"https://openalex.org/keywords/multimodal-interaction","display_name":"Multimodal interaction","score":0.4187103807926178},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.36469778418540955},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.2391606867313385}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8236311674118042},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6283434629440308},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.5912586450576782},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5682955384254456},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5053918361663818},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.4187103807926178},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.36469778418540955},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2391606867313385},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3652583.3658044","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658044","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658044","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3652583.3658044","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3652583.3658044","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3652583.3658044","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399418349.pdf"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W569478347","https://openalex.org/W2126579184","https://openalex.org/W2187089797","https://openalex.org/W2194775991","https://openalex.org/W2507009361","https://openalex.org/W2560662850","https://openalex.org/W2593390416","https://openalex.org/W2625366777","https://openalex.org/W2894846593","https://openalex.org/W2894873912","https://openalex.org/W2894906112","https://openalex.org/W2943605315","https://openalex.org/W2949879676","https://openalex.org/W2962799101","https://openalex.org/W2962805158","https://openalex.org/W2963070905","https://openalex.org/W2963524571","https://openalex.org/W2963943197","https://openalex.org/W2964105864","https://openalex.org/W2978329087","https://openalex.org/W2994785205","https://openalex.org/W3034312118","https://openalex.org/W3034637015","https://openalex.org/W3035374961","https://openalex.org/W3045564095","https://openalex.org/W3092600962","https://openalex.org/W3093455342","https://openalex.org/W3095374178","https://openalex.org/W3096831136","https://openalex.org/W3099486271","https://openalex.org/W3120387732","https://openalex.org/W3173271747","https://openalex.org/W3182874523","https://openalex.org/W3192801537","https://openalex.org/W4226058394","https://openalex.org/W4312259618","https://openalex.org/W4312733400","https://openalex.org/W4312959318","https://openalex.org/W4313046672","https://openalex.org/W4386065787","https://openalex.org/W4390872437","https://openalex.org/W6600577311","https://openalex.org/W6699757912"],"related_works":["https://openalex.org/W3204184292","https://openalex.org/W3176564347","https://openalex.org/W1985458517","https://openalex.org/W2355833770","https://openalex.org/W3031039437","https://openalex.org/W183202219","https://openalex.org/W3095877357","https://openalex.org/W2072565696","https://openalex.org/W2050451745","https://openalex.org/W2378903222"],"abstract_inverted_index":{"Current":[0],"methods":[1],"for":[2],"few-shot":[3,162],"action":[4,163],"recognition":[5,164],"mainly":[6],"fall":[7],"into":[8,179],"the":[9,17,27,50,83,96,136,148,180,183],"metric":[10],"learning":[11],"framework":[12],"following":[13],"ProtoNet,":[14],"which":[15,48,140],"demonstrates":[16],"importance":[18],"of":[19,29,53,138,153],"prototypes.":[20,61,112],"Although":[21],"they":[22],"achieve":[23],"relatively":[24],"good":[25,78],"performance,":[26],"effect":[28],"multimodal":[30,57,79,115,122],"information":[31,52,58],"is":[32,141],"ignored,":[33],"e.g.":[34],"label":[35,54],"texts.":[36],"In":[37,95],"this":[38],"work,":[39],"we":[40,127],"propose":[41],"a":[42,67,91,99,121,129],"novel":[43],"MultimOdal":[44],"PRototype-ENhanced":[45],"Network":[46],"(MORN),":[47],"uses":[49],"semantic":[51],"texts":[55],"as":[56],"to":[59,74,109,134,143],"enhance":[60],"A":[62],"CLIP":[63,69],"visual":[64,84,86,92],"encoder":[65,71],"and":[66,103,151,169,171],"frozen":[68],"text":[70,97,111],"are":[72,88,107,117],"introduced":[73],"obtain":[75,110],"features":[76],"with":[77],"initialization.":[80],"Then":[81],"in":[82],"flow,":[85,98],"prototypes":[87,116],"computed":[89,119],"by":[90,120],"prototype-computed":[93],"module.":[94,125],"semantic-enhanced":[100],"(SE)":[101],"module":[102],"an":[104],"inflating":[105],"operation":[106],"used":[108,142],"The":[113],"final":[114],"then":[118],"prototype-enhanced":[123],"(MPE)":[124],"Besides,":[126],"define":[128],"PRototype":[130],"SImilarity":[131],"DiffErence":[132],"(PRIDE)":[133],"evaluate":[135],"quality":[137],"prototypes,":[139],"verify":[144],"our":[145],"improvement":[146],"on":[147,159],"prototype":[149],"level":[150],"effectiveness":[152],"MORN.":[154],"We":[155],"conduct":[156],"extensive":[157],"experiments":[158],"four":[160],"popular":[161],"datasets:":[165],"HMDB51,":[166],"UCF101,":[167],"Kinetics":[168],"SSv2,":[170],"MORN":[172],"achieves":[173],"state-of-the-art":[174],"results.":[175],"When":[176],"plugging":[177],"PRIDE":[178],"training":[181],"stage,":[182],"performance":[184],"can":[185],"be":[186],"further":[187],"improved.":[188]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":9}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
