{"id":"https://openalex.org/W4391454481","doi":"https://doi.org/10.1109/tmm.2024.3361157","title":"Commonsense Knowledge Prompting for Few-Shot Action Recognition in Videos","display_name":"Commonsense Knowledge Prompting for Few-Shot Action Recognition in Videos","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4391454481","doi":"https://doi.org/10.1109/tmm.2024.3361157"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3361157","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3361157","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101078991","display_name":"Yuheng Shi","orcid":"https://orcid.org/0009-0003-7573-4279"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuheng Shi","raw_affiliation_strings":["Beijing Laboratory of Intelligent Information Technology, School of Computer Science, Beijing Institute of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Laboratory of Intelligent Information Technology, School of Computer Science, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011070646","display_name":"Xinxiao Wu","orcid":"https://orcid.org/0000-0002-2056-6947"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]},{"id":"https://openalex.org/I4388482657","display_name":"Shenzhen MSU-BIT University","ror":"https://ror.org/02q963474","country_code":null,"type":"education","lineage":["https://openalex.org/I4388482657"]},{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinxiao Wu","raw_affiliation_strings":["Beijing Laboratory of Intelligent Information Technology, School of Computer Science, Beijing Institute of Technology, Beijing, China","Guangdong Provincial Laboratory of Machine Perception and Intelligent Computing, Shenzhen MSU-BIT University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Beijing Laboratory of Intelligent Information Technology, School of Computer Science, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]},{"raw_affiliation_string":"Guangdong Provincial Laboratory of Machine Perception and Intelligent Computing, Shenzhen MSU-BIT University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961","https://openalex.org/I4388482657"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052934693","display_name":"Hanxi Lin","orcid":"https://orcid.org/0009-0007-4734-9639"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hanxi Lin","raw_affiliation_strings":["Beijing Laboratory of Intelligent Information Technology, School of Computer Science, Beijing Institute of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Laboratory of Intelligent Information Technology, School of Computer Science, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055469774","display_name":"Jiebo Luo","orcid":"https://orcid.org/0000-0002-4516-9729"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiebo Luo","raw_affiliation_strings":["Department of Computer Science, University of Rochester, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Rochester, Rochester, NY, USA","institution_ids":["https://openalex.org/I5388228"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101078991"],"corresponding_institution_ids":["https://openalex.org/I125839683"],"apc_list":null,"apc_paid":null,"fwci":2.5998,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.90300338,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"26","issue":null,"first_page":"8395","last_page":"8405"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9782999753952026,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8683818578720093},{"id":"https://openalex.org/keywords/commonsense-knowledge","display_name":"Commonsense knowledge","score":0.7153897881507874},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6220775842666626},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5840957760810852},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.566768229007721},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5616893768310547},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5597243905067444},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.5248548984527588},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5083968043327332},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.47601157426834106},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.46169814467430115},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4388432204723358},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.4262721538543701},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4219074249267578},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.42140883207321167},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12688082456588745}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8683818578720093},{"id":"https://openalex.org/C30542707","wikidata":"https://www.wikidata.org/wiki/Q1603203","display_name":"Commonsense knowledge","level":3,"score":0.7153897881507874},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6220775842666626},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5840957760810852},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.566768229007721},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5616893768310547},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5597243905067444},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.5248548984527588},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5083968043327332},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.47601157426834106},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.46169814467430115},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4388432204723358},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.4262721538543701},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4219074249267578},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.42140883207321167},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12688082456588745},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3361157","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3361157","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7300000190734863,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G816247876","display_name":null,"funder_award_id":"62072041","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W24089286","https://openalex.org/W1623072288","https://openalex.org/W1980287119","https://openalex.org/W2126579184","https://openalex.org/W2187089797","https://openalex.org/W2277195237","https://openalex.org/W2625366777","https://openalex.org/W2887057599","https://openalex.org/W2894873912","https://openalex.org/W2895243423","https://openalex.org/W2896457183","https://openalex.org/W2926636120","https://openalex.org/W2963315828","https://openalex.org/W2963524571","https://openalex.org/W3035047011","https://openalex.org/W3035180180","https://openalex.org/W3035374961","https://openalex.org/W3041485444","https://openalex.org/W3094502228","https://openalex.org/W3095374178","https://openalex.org/W3153427360","https://openalex.org/W3173271747","https://openalex.org/W3184234949","https://openalex.org/W3198377975","https://openalex.org/W3200114289","https://openalex.org/W3200749679","https://openalex.org/W4283819124","https://openalex.org/W4286242385","https://openalex.org/W4312259618","https://openalex.org/W4312599396","https://openalex.org/W4312959318","https://openalex.org/W4313046672","https://openalex.org/W4386065852","https://openalex.org/W4390872570","https://openalex.org/W6600983433","https://openalex.org/W6631190155","https://openalex.org/W6766578407","https://openalex.org/W6789909235","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6797716411","https://openalex.org/W6801567822","https://openalex.org/W6803326850"],"related_works":["https://openalex.org/W579810227","https://openalex.org/W3035583586","https://openalex.org/W2952780262","https://openalex.org/W2375873920","https://openalex.org/W2979495269","https://openalex.org/W3165136392","https://openalex.org/W2392917763","https://openalex.org/W2221077171","https://openalex.org/W2083429127","https://openalex.org/W1979978247"],"abstract_inverted_index":{"Few-shot":[0],"action":[1,76,141,160],"recognition":[2],"in":[3,88],"videos":[4],"is":[5,84,191],"challenging":[6],"as":[7,70,140],"the":[8,129,135,156,180,187],"lack":[9],"of":[10,40,64,67,81,105,128,159,186],"supervision":[11],"makes":[12],"it":[13],"extremely":[14],"difficult":[15],"to":[16,19,45,73,115,124,154,184],"generalize":[17],"well":[18],"unseen":[20],"actions.":[21],"To":[22,55],"address":[23],"this":[24],"challenge,":[25],"we":[26,58,110,147],"propose":[27],"a":[28,47,61,89,116,149],"simple":[29],"yet":[30],"effective":[31],"method,":[32],"called":[33],"knowledge":[34,39,77],"prompting,":[35],"which":[36],"leverages":[37],"commonsense":[38],"actions":[41],"from":[42,103],"external":[43,95],"resources":[44],"prompt-tune":[46],"powerful":[48],"pre-trained":[49,117],"vision-language":[50,118],"model":[51,119],"for":[52,131,162],"few-shot":[53],"classification.":[54,163],"that":[56,171],"end,":[57],"first":[59],"collect":[60],"large-scale":[62],"corpus":[63,97],"language":[65],"descriptions":[66],"actions,":[68],"defined":[69],"text":[71,82,113],"proposals,":[72],"build":[74],"an":[75,94],"base.":[78],"The":[79],"collection":[80],"proposals":[83,114,130],"done":[85],"by":[86,99],"filling":[87],"handcraft":[90],"sentence":[91],"template":[92],"with":[93,121,143],"action-related":[96,101],"or":[98],"extracting":[100],"phrases":[102],"captions":[104],"Web":[106],"instruction":[107],"videos.":[108],"Next,":[109],"feed":[111],"these":[112],"along":[120],"video":[122],"frames":[123],"generate":[125],"matching":[126],"scores":[127,136],"each":[132],"frame,":[133],"and":[134],"can":[137],"be":[138],"treated":[139],"semantics":[142,161],"strong":[144],"generalization.":[145],"Finally,":[146],"design":[148],"lightweight":[150],"temporal":[151,157],"modeling":[152],"network":[153],"capture":[155],"evolution":[158],"Extensive":[164],"experiments":[165],"on":[166],"six":[167],"benchmark":[168],"datasets":[169],"demonstrate":[170],"our":[172],"method":[173],"generally":[174],"achieves":[175],"state-of-the-art":[176],"performance":[177],"while":[178],"reducing":[179],"training":[181],"computational":[182],"cost":[183],"0.1%":[185],"existing":[188],"methods.":[189],"Code":[190],"available":[192],"at":[193],"<uri":[194],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[195],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/OldStone0124/Knowledge-Prompting-for-FSAR.</uri>":[196]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
