{"id":"https://openalex.org/W4416261102","doi":"https://doi.org/10.1109/iccv51701.2025.01035","title":"Frequency-Semantic Enhanced Variational Autoencoder for Zero-Shot Skeleton-Based Action Recognition","display_name":"Frequency-Semantic Enhanced Variational Autoencoder for Zero-Shot Skeleton-Based Action Recognition","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416261102","doi":"https://doi.org/10.1109/iccv51701.2025.01035"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.01035","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01035","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2506.22179","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100549891","display_name":"Wenhan Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wenhan Wu","raw_affiliation_strings":["University of North Carolina at Charlotte,Department of Computer Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of North Carolina at Charlotte,Department of Computer Science","institution_ids":["https://openalex.org/I102149020"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054429972","display_name":"Zhishuai Guo","orcid":"https://orcid.org/0000-0003-2153-2287"},"institutions":[{"id":"https://openalex.org/I102502594","display_name":"Northern Illinois University","ror":"https://ror.org/012wxa772","country_code":"US","type":"education","lineage":["https://openalex.org/I102502594"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhishuai Guo","raw_affiliation_strings":["Northern Illinois University,Department of Computer Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northern Illinois University,Department of Computer Science","institution_ids":["https://openalex.org/I102502594"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100418563","display_name":"Chen Chen","orcid":"https://orcid.org/0000-0003-3498-2527"},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chen Chen","raw_affiliation_strings":["University of Central Florida,Center for Research in Computer Vision"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Central Florida,Center for Research in Computer Vision","institution_ids":["https://openalex.org/I106165777"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014632942","display_name":"Hongfei Xue","orcid":"https://orcid.org/0000-0001-9691-9668"},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hongfei Xue","raw_affiliation_strings":["University of North Carolina at Charlotte,Department of Computer Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of North Carolina at Charlotte,Department of Computer Science","institution_ids":["https://openalex.org/I102149020"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015271163","display_name":"Aidong Lu","orcid":"https://orcid.org/0000-0002-7684-4512"},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aidong Lu","raw_affiliation_strings":["University of North Carolina at Charlotte,Department of Computer Science"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of North Carolina at Charlotte,Department of Computer Science","institution_ids":["https://openalex.org/I102149020"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100549891"],"corresponding_institution_ids":["https://openalex.org/I102149020"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.32441488,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"11122","last_page":"11131"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9603000283241272,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9603000283241272,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.005900000222027302,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.005400000140070915,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7870000004768372},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.6100999712944031},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5899999737739563},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.5407000184059143},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.48420000076293945},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4690000116825104},{"id":"https://openalex.org/keywords/semantic-gap","display_name":"Semantic gap","score":0.4366999864578247}],"concepts":[{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7870000004768372},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.761900007724762},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7142000198364258},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.6100999712944031},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5899999737739563},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.5407000184059143},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.48420000076293945},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4690000116825104},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.4366999864578247},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4275999963283539},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4059999883174896},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3684000074863434},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34860000014305115},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34630000591278076},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.3343000113964081},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.33090001344680786},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.2808000147342682}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.01035","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01035","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2506.22179","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.22179","pdf_url":"https://arxiv.org/pdf/2506.22179","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2506.22179","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2506.22179","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2506.22179","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.22179","pdf_url":"https://arxiv.org/pdf/2506.22179","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Zero-shot":[0],"skeleton-based":[1],"action":[2,36,102,107,186,190],"recognition":[3],"aims":[4],"to":[5,64,90,112,146],"develop":[6],"models":[7],"capable":[8],"of":[9,34,76,100,131,170,181],"identifying":[10],"actions":[11],"beyond":[12],"the":[13,32,39,43,66,92,98,122,128,165,168],"categories":[14],"encountered":[15],"during":[16],"training.":[17],"Previous":[18],"approaches":[19],"have":[20],"primarily":[21],"focused":[22],"on":[23,164],"aligning":[24],"visual":[25],"and":[26,49,87,96,117,125,152,156,183],"semantic":[27,40,68,123,176],"representations":[28],"but":[29],"often":[30],"overlooked":[31],"importance":[33],"fine-grained":[35],"patterns":[37],"in":[38,46,133,154],"space":[41],"(e.g.,":[42],"hand":[44],"movements":[45],"drinking":[47],"water":[48],"brushing":[50],"teeth).":[51],"To":[52],"address":[53],"these":[54],"limitations,":[55],"we":[56],"propose":[57],"a":[58,81,105,137],"Frequency-Semantic":[59],"Enhanced":[60],"Variational":[61],"Autoencoder":[62],"(FS-VAE)":[63],"explore":[65],"skeleton":[67,134,155],"representation":[69],"learning":[70,95],"with":[71,85,109],"frequency":[72],"decomposition.":[73],"FS-VAE":[74],"consists":[75],"three":[77],"key":[78],"components:":[79],"1)":[80],"frequency-based":[82],"enhancement":[83],"module":[84],"high-":[86],"low-frequency":[88],"adjustments":[89],"enrich":[91],"skeletal":[93],"semantics":[94],"improve":[97],"robustness":[99],"zero-shot":[101,189],"recognition;":[103],"2)":[104],"semantic-based":[106],"description":[108],"multilevel":[110],"alignment":[111],"capture":[113],"both":[114],"local":[115],"details":[116],"global":[118],"correspondence,":[119],"effectively":[120],"bridging":[121],"gap":[124],"compensating":[126],"for":[127],"inherent":[129],"loss":[130,140],"information":[132],"sequences;":[135],"3)":[136],"calibrated":[138],"cross-alignment":[139],"that":[141,174],"enables":[142],"valid":[143],"skeleton-text":[144],"pairs":[145],"counterbalance":[147],"ambiguous":[148],"ones,":[149],"mitigating":[150],"discrepancies":[151],"ambiguities":[153],"text":[157],"features,":[158],"thereby":[159],"ensuring":[160],"robust":[161,179],"alignment.":[162],"Evaluations":[163],"benchmarks":[166],"demonstrate":[167],"effectiveness":[169],"our":[171],"approach,":[172],"validating":[173],"frequency-enhanced":[175],"features":[177],"enable":[178],"differentiation":[180],"visually":[182],"semantically":[184],"similar":[185],"clusters,":[187],"improving":[188],"recognition.":[191]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
