{"id":"https://openalex.org/W4408355355","doi":"https://doi.org/10.1109/icassp49660.2025.10888571","title":"Reenvisioning Skeleton-based Action Recognition Through the Lens of NLP","display_name":"Reenvisioning Skeleton-based Action Recognition Through the Lens of NLP","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408355355","doi":"https://doi.org/10.1109/icassp49660.2025.10888571"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10888571","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888571","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022604923","display_name":"Long Cao","orcid":"https://orcid.org/0000-0002-8336-4242"},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Long Cao","raw_affiliation_strings":["Guangxi University,School of Mechanical Engineering,Nanning,China"],"affiliations":[{"raw_affiliation_string":"Guangxi University,School of Mechanical Engineering,Nanning,China","institution_ids":["https://openalex.org/I150807315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116493739","display_name":"Shuo Huai","orcid":"https://orcid.org/0009-0004-5269-0688"},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuo Huai","raw_affiliation_strings":["Guangxi University,School of Mechanical Engineering,Nanning,China"],"affiliations":[{"raw_affiliation_string":"Guangxi University,School of Mechanical Engineering,Nanning,China","institution_ids":["https://openalex.org/I150807315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086277421","display_name":"Jingyao Gai","orcid":"https://orcid.org/0000-0002-1330-0454"},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingyao Gai","raw_affiliation_strings":["Guangxi University,School of Mechanical Engineering,Nanning,China"],"affiliations":[{"raw_affiliation_string":"Guangxi University,School of Mechanical Engineering,Nanning,China","institution_ids":["https://openalex.org/I150807315"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5022604923"],"corresponding_institution_ids":["https://openalex.org/I150807315"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04374002,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9215999841690063,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9215999841690063,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7365704774856567},{"id":"https://openalex.org/keywords/skeleton","display_name":"Skeleton (computer programming)","score":0.7363196611404419},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6960672736167908},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.5704684853553772},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5279447436332703},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5054061412811279},{"id":"https://openalex.org/keywords/lens","display_name":"Lens (geology)","score":0.4389820098876953},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.39679884910583496},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3813282549381256},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09115350246429443},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.04336676001548767},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.0403464138507843}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7365704774856567},{"id":"https://openalex.org/C18969341","wikidata":"https://www.wikidata.org/wiki/Q1169129","display_name":"Skeleton (computer programming)","level":2,"score":0.7363196611404419},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6960672736167908},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.5704684853553772},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5279447436332703},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5054061412811279},{"id":"https://openalex.org/C15336307","wikidata":"https://www.wikidata.org/wiki/Q1766051","display_name":"Lens (geology)","level":2,"score":0.4389820098876953},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.39679884910583496},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3813282549381256},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09115350246429443},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.04336676001548767},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0403464138507843},{"id":"https://openalex.org/C78762247","wikidata":"https://www.wikidata.org/wiki/Q1273174","display_name":"Petroleum engineering","level":1,"score":0.0},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10888571","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888571","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320327522","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167"},{"id":"https://openalex.org/F4320336630","display_name":"Specific Research Project of Guangxi for Research Bases and Talents","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W2048821851","https://openalex.org/W2054041160","https://openalex.org/W2063827895","https://openalex.org/W2122534450","https://openalex.org/W2313903725","https://openalex.org/W2442651457","https://openalex.org/W2606294640","https://openalex.org/W2752796333","https://openalex.org/W2944006115","https://openalex.org/W2948058585","https://openalex.org/W2954146807","https://openalex.org/W2963076818","https://openalex.org/W2964134613","https://openalex.org/W3030163527","https://openalex.org/W3035050855","https://openalex.org/W3096609285","https://openalex.org/W3203634062","https://openalex.org/W3215030504","https://openalex.org/W4205831148","https://openalex.org/W4282981352","https://openalex.org/W4312757522","https://openalex.org/W4318348288","https://openalex.org/W4320015918","https://openalex.org/W4322718191","https://openalex.org/W4385194759","https://openalex.org/W4387854270","https://openalex.org/W4390871852","https://openalex.org/W4391582461","https://openalex.org/W4407690720","https://openalex.org/W6755207826","https://openalex.org/W6790690058"],"related_works":["https://openalex.org/W2953562271","https://openalex.org/W2334655667","https://openalex.org/W2464530384","https://openalex.org/W2105938841","https://openalex.org/W4253358700","https://openalex.org/W3139863978","https://openalex.org/W1576128429","https://openalex.org/W4399611050","https://openalex.org/W2269464716","https://openalex.org/W3170431411"],"abstract_inverted_index":{"Recent":[0],"research":[1],"on":[2,8,82,158],"skeleton-based":[3],"action":[4,135],"recognition":[5,136],"has":[6],"focused":[7],"designing":[9],"network":[10],"architectures":[11],"that":[12,152],"effectively":[13],"capture":[14],"motion":[15],"features.":[16],"In":[17],"this":[18],"work,":[19],"we":[20],"draw":[21],"inspiration":[22],"from":[23],"the":[24,27,96,100,116,129,139],"similarity":[25],"between":[26],"temporal":[28,144],"dynamics":[29],"of":[30,98,106],"action-information-embedded":[31],"skeleton":[32,51,90,93,107,110],"sequences":[33,91],"and":[34,63,71,84,103,146],"natural":[35],"text,":[36],"proposing":[37],"a":[38],"novel":[39],"framework":[40,49],"called":[41],"Action":[42],"Recognition":[43],"via":[44],"Language":[45,60],"Processing":[46,61],"(ARLP).":[47],"This":[48],"treats":[50],"sequence":[52],"data":[53],"as":[54],"analogous":[55],"to":[56,88,132,134,141],"\"sentences\"":[57],"in":[58],"Natural":[59],"(NLP)":[62],"employs":[64],"similar":[65],"processing":[66],"pipelines":[67],"for":[68],"feature":[69],"extraction":[70],"classification.":[72],"The":[73],"Skeleton":[74],"Vector":[75],"Quantized":[76],"Variational":[77],"Autoencoder":[78],"(SVQ-VAE)":[79],"constructed":[80],"based":[81],"GCNs":[83],"VQ-VAE":[85],"are":[86,112],"proposed":[87,117],"transform":[89],"into":[92,115],"tokens,":[94],"addressing":[95],"challenge":[97],"preserving":[99],"spatial":[101],"dimensions":[102],"topological":[104],"information":[105],"data.":[108],"These":[109],"tokens":[111],"then":[113],"fed":[114],"Pose":[118],"Transformer":[119],"(POTR),":[120],"where":[121],"an":[122],"\"action":[123],"queries\"":[124],"mechanism":[125],"is":[126],"introduced":[127],"at":[128],"transformer\u2019s":[130],"decoder":[131],"adapt":[133],"tasks,":[137],"enabling":[138],"model":[140],"autoregressively":[142],"extract":[143],"features":[145],"enhance":[147],"interpretability.":[148],"Experimental":[149],"results":[150],"demonstrate":[151],"ARLP":[153],"significantly":[154],"outperforms":[155],"benchmark":[156],"models":[157],"three":[159],"mainstream":[160],"datasets.":[161]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
