{"id":"https://openalex.org/W4391383802","doi":"https://doi.org/10.1145/3627631.3627637","title":"ViLP: Knowledge Exploration using Vision, Language, and Pose Embeddings for Video Action Recognition","display_name":"ViLP: Knowledge Exploration using Vision, Language, and Pose Embeddings for Video Action Recognition","publication_year":2023,"publication_date":"2023-12-15","ids":{"openalex":"https://openalex.org/W4391383802","doi":"https://doi.org/10.1145/3627631.3627637"},"language":"en","primary_location":{"id":"doi:10.1145/3627631.3627637","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3627631.3627637","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourteenth Indian Conference on Computer Vision, Graphics and Image Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101743181","display_name":"Soumyabrata Chaudhuri","orcid":"https://orcid.org/0009-0006-1797-5121"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Soumyabrata Chaudhuri","raw_affiliation_strings":["IIT Bhubaneswar, IN"],"affiliations":[{"raw_affiliation_string":"IIT Bhubaneswar, IN","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079312227","display_name":"Saumik Bhattacharya","orcid":"https://orcid.org/0000-0003-1273-7969"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saumik Bhattacharya","raw_affiliation_strings":["E&amp;ECE, Indian Institute of Technology Kharagpur, IN"],"affiliations":[{"raw_affiliation_string":"E&amp;ECE, Indian Institute of Technology Kharagpur, IN","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101743181"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.8364,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.76169346,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7628461122512817},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.7300252914428711},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6374855637550354},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5838358402252197},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5392808318138123}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7628461122512817},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.7300252914428711},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6374855637550354},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5838358402252197},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5392808318138123},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3627631.3627637","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3627631.3627637","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourteenth Indian Conference on Computer Vision, Graphics and Image Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7467836518","display_name":null,"funder_award_id":"SRG/2021/001417","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W28988658","https://openalex.org/W1944615693","https://openalex.org/W1983364832","https://openalex.org/W2079447555","https://openalex.org/W2105101328","https://openalex.org/W2113325037","https://openalex.org/W2124688298","https://openalex.org/W2126579184","https://openalex.org/W2156135524","https://openalex.org/W2165715280","https://openalex.org/W2217325140","https://openalex.org/W2307770531","https://openalex.org/W2462996230","https://openalex.org/W2507009361","https://openalex.org/W2559085405","https://openalex.org/W2746726611","https://openalex.org/W2950568498","https://openalex.org/W2963155035","https://openalex.org/W2963282966","https://openalex.org/W2963402313","https://openalex.org/W2963820951","https://openalex.org/W2990503944","https://openalex.org/W2996901793","https://openalex.org/W3035303837","https://openalex.org/W3133696297","https://openalex.org/W3135367836","https://openalex.org/W3138516171","https://openalex.org/W4214612132","https://openalex.org/W4214614183","https://openalex.org/W4214727094","https://openalex.org/W4225414521","https://openalex.org/W4285606530","https://openalex.org/W4312480274","https://openalex.org/W4312560592","https://openalex.org/W4312781038","https://openalex.org/W4382467086","https://openalex.org/W4386065852","https://openalex.org/W4386072365","https://openalex.org/W6778883912","https://openalex.org/W6790690058"],"related_works":["https://openalex.org/W2755342338","https://openalex.org/W2058170566","https://openalex.org/W2036807459","https://openalex.org/W2775347418","https://openalex.org/W1969923398","https://openalex.org/W2166024367","https://openalex.org/W2772917594","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2079911747"],"abstract_inverted_index":{"Video":[0],"Action":[1],"Recognition":[2],"(VAR)":[3],"is":[4,34],"a":[5,23,28,36],"challenging":[6,37],"task":[7],"due":[8],"to":[9,26,102],"its":[10],"inherent":[11],"complexities.":[12],"Though":[13],"different":[14],"approaches":[15],"have":[16,99],"been":[17,59,90,100],"explored":[18,91],"in":[19,47,67,76,105],"the":[20,70,79,115],"literature,":[21,51],"designing":[22],"unified":[24],"framework":[25],"recognize":[27],"large":[29],"number":[30],"of":[31,81,130,156],"human":[32,137],"actions":[33],"still":[35],"problem.":[38],"Recently,":[39],"Multi-Modal":[40],"Learning":[41],"(MML)":[42],"has":[43,57,88],"demonstrated":[44],"promising":[45],"results":[46],"this":[48,62,111],"domain.":[49],"In":[50,110],"2D":[52],"skeleton":[53],"or":[54,66],"pose":[55,96,117],"modality":[56],"often":[58],"used":[60],"for":[61,122],"task,":[63],"either":[64],"independently":[65,98],"conjunction":[68],"with":[69],"visual":[71,83],"information":[72],"(RGB":[73],"modality)":[74],"present":[75,114],"videos.":[77],"However,":[78],"combination":[80],"pose,":[82],"information,":[84],"and":[85,95,132,144,153,158],"text":[86,94],"attributes":[87,97],"not":[89],"yet,":[92],"though":[93],"proven":[101],"be":[103],"effective":[104],"numerous":[106],"computer":[107],"vision":[108],"tasks.":[109],"paper,":[112],"we":[113],"first":[116],"augmented":[118],"Vision-language":[119],"model":[120],"(VLM)":[121],"VAR.":[123],"Notably,":[124],"our":[125],"scheme":[126],"achieves":[127],"an":[128,154],"accuracy":[129,155],"92.81%":[131],"73.02%":[133],"on":[134],"two":[135],"popular":[136],"video":[138,150],"action":[139],"recognition":[140],"benchmark":[141],"datasets,":[142],"UCF-101":[143],"HMDB-51,":[145],"respectively,":[146],"even":[147],"without":[148],"any":[149],"data":[151],"pre-training,":[152],"96.11%":[157],"75.75%":[159],"after":[160],"kinetics":[161],"pre-training.":[162]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
