{"id":"https://openalex.org/W4387969359","doi":"https://doi.org/10.1145/3581783.3612560","title":"Skeletal Spatial-Temporal Semantics Guided Homogeneous-Heterogeneous Multimodal Network for Action Recognition","display_name":"Skeletal Spatial-Temporal Semantics Guided Homogeneous-Heterogeneous Multimodal Network for Action Recognition","publication_year":2023,"publication_date":"2023-10-26","ids":{"openalex":"https://openalex.org/W4387969359","doi":"https://doi.org/10.1145/3581783.3612560"},"language":"en","primary_location":{"id":"doi:10.1145/3581783.3612560","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3612560","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022973831","display_name":"Chenwei Zhang","orcid":"https://orcid.org/0009-0000-6698-0652"},"institutions":[{"id":"https://openalex.org/I4388482657","display_name":"Shenzhen MSU-BIT University","ror":"https://ror.org/02q963474","country_code":null,"type":"education","lineage":["https://openalex.org/I4388482657"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chenwei Zhang","raw_affiliation_strings":["Sun Yat-Sen University &amp; Shenzhen MSU-BIT University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University &amp; Shenzhen MSU-BIT University, Shenzhen, China","institution_ids":["https://openalex.org/I4388482657"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102941795","display_name":"Yuxuan Hu","orcid":"https://orcid.org/0009-0005-8571-118X"},"institutions":[{"id":"https://openalex.org/I4388482657","display_name":"Shenzhen MSU-BIT University","ror":"https://ror.org/02q963474","country_code":null,"type":"education","lineage":["https://openalex.org/I4388482657"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxuan Hu","raw_affiliation_strings":["Sun Yat-Sen University &amp; Shenzhen MSU-BIT University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-Sen University &amp; Shenzhen MSU-BIT University, Shenzhen, China","institution_ids":["https://openalex.org/I4388482657"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083369457","display_name":"Min Yang","orcid":"https://orcid.org/0000-0001-7345-5071"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Yang","raw_affiliation_strings":["SIAT, Chinese Academy of Sciences, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"SIAT, Chinese Academy of Sciences, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059967200","display_name":"Chengming Li","orcid":"https://orcid.org/0000-0002-4592-3875"},"institutions":[{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]},{"id":"https://openalex.org/I4388482657","display_name":"Shenzhen MSU-BIT University","ror":"https://ror.org/02q963474","country_code":null,"type":"education","lineage":["https://openalex.org/I4388482657"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengming Li","raw_affiliation_strings":["Shenzhen MSU-BIT University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen MSU-BIT University, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380","https://openalex.org/I4388482657"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007941489","display_name":"Xiping Hu","orcid":"https://orcid.org/0000-0002-4952-699X"},"institutions":[{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]},{"id":"https://openalex.org/I4388482657","display_name":"Shenzhen MSU-BIT University","ror":"https://ror.org/02q963474","country_code":null,"type":"education","lineage":["https://openalex.org/I4388482657"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiping Hu","raw_affiliation_strings":["Shenzhen MSU-BIT University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen MSU-BIT University, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380","https://openalex.org/I4388482657"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5022973831"],"corresponding_institution_ids":["https://openalex.org/I4388482657"],"apc_list":null,"apc_paid":null,"fwci":0.4814,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.658424,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"3657","last_page":"3666"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8055344820022583},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6746894121170044},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.4554117023944855},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.4339379072189331},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4318602681159973},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3519997000694275}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8055344820022583},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6746894121170044},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.4554117023944855},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.4339379072189331},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4318602681159973},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3519997000694275},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3581783.3612560","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3612560","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","score":0.5799999833106995,"display_name":"Life in Land"}],"awards":[{"id":"https://openalex.org/G1477544716","display_name":null,"funder_award_id":"Guangdong","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6908683755","display_name":null,"funder_award_id":"62101543","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G917117686","display_name":null,"funder_award_id":"51501190","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321921","display_name":"Natural Science Foundation of Guangdong Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1969595749","https://openalex.org/W2054041160","https://openalex.org/W2183341477","https://openalex.org/W2187089797","https://openalex.org/W2593146028","https://openalex.org/W2607707631","https://openalex.org/W2794805739","https://openalex.org/W2922509574","https://openalex.org/W2944006115","https://openalex.org/W2948058585","https://openalex.org/W2963076818","https://openalex.org/W2963465695","https://openalex.org/W2963524571","https://openalex.org/W2963853051","https://openalex.org/W2963901033","https://openalex.org/W2964134613","https://openalex.org/W2981548405","https://openalex.org/W3034999503","https://openalex.org/W3035050855","https://openalex.org/W3035225512","https://openalex.org/W3035524453","https://openalex.org/W3093411241","https://openalex.org/W3106615203","https://openalex.org/W3155332028","https://openalex.org/W3169413442","https://openalex.org/W3175624454","https://openalex.org/W3203634062","https://openalex.org/W3205898195","https://openalex.org/W3210968912","https://openalex.org/W3213518743","https://openalex.org/W4226079235","https://openalex.org/W4255556797","https://openalex.org/W4282943820","https://openalex.org/W4312757522","https://openalex.org/W4319299930","https://openalex.org/W4321231508"],"related_works":["https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398","https://openalex.org/W2775347418"],"abstract_inverted_index":{"Action":[0],"recognition":[1,64,244],"research":[2],"has":[3],"gained":[4],"significant":[5],"attention":[6,183],"with":[7],"two":[8,42,67,111],"dominant":[9],"unimodal":[10],"approaches:":[11],"skeleton-based":[12,62,175,240],"and":[13,74,91,101,105,167,228,231,241],"RGB":[14,157,187],"video-based.":[15],"While":[16],"the":[17,27,38,87,125,138,147,150,170,173,178,186,191,204],"former":[18],"is":[19,130,159],"known":[20],"for":[21,34,61,120,211],"its":[22],"robustness":[23],"in":[24,110,238],"complex":[25],"backgrounds,":[26],"latter":[28],"provides":[29,181],"rich":[30],"environmental":[31],"information":[32],"useful":[33],"context-based":[35],"analysis.":[36],"However,":[37],"fusion":[39,136,151],"of":[40,152],"these":[41],"modalities":[43,142,154],"remains":[44],"an":[45],"open":[46],"challenge.":[47],"In":[48,124,146],"this":[49],"paper,":[50],"we":[51,113],"propose":[52,114],"a":[53,115],"Spatial":[54],"Transformer":[55,72],"&":[56,156,226],"Selective":[57,75],"Temporal":[58,77],"encoder":[59],"(ST&ST)":[60],"action":[63,122,213,243],"by":[65],"constructing":[66],"modules:":[68],"Reranking-Enhanced":[69],"Dynamic":[70],"Mask":[71],"(RE-DMT)":[73],"Kernel":[76],"Convolution":[78],"(SK-TC).":[79],"The":[80,96],"RE-DMT":[81],"captures":[82,98],"global":[83],"spatial":[84],"features,":[85],"while":[86],"dynamic":[88],"mask":[89],"strategy":[90,93],"reranking":[92],"reduce":[94],"redundancy.":[95],"SK-TC":[97],"both":[99,239],"long-term":[100],"short-term":[102],"temporal":[103],"features":[104],"enables":[106,197],"adaptive":[107],"fusion.":[108],"Furthermore,":[109],"phases,":[112],"Homogeneous-Heterogeneous":[116],"Multimodal":[117],"Network":[118],"(HHMNet)":[119],"multi-modal":[121,242],"recognition.":[123,214],"first":[126],"phase,":[127,149],"contrastive":[128,195],"learning":[129,196,208],"employed":[131],"to":[132,185],"achieve":[133],"implicit":[134],"semantic":[135,198],"within":[137],"four":[139],"homogeneous":[140],"skeletal":[141],"(joint,":[143],"bone,":[144],"etc.).":[145],"second":[148],"heterogeneous":[153,201],"(skeleton":[155],"video)":[158],"carried":[160],"out":[161],"at":[162],"three":[163],"levels:":[164],"model,":[165],"feature,":[166],"decision.":[168],"At":[169,190,203],"model":[171,176],"level,":[172,193,206],"powerful":[174],"from":[177],"previous":[179],"phase":[180],"explicit":[182],"guidance":[184],"video-based":[188],"model.":[189],"feature":[192],"multi-part":[194],"distillation":[199],"between":[200],"modalities.":[202],"decision":[205],"ensemble":[207],"combines":[209],"outputs":[210],"final":[212],"We":[215],"evaluate":[216],"our":[217],"proposed":[218],"ST&ST":[219],"guided":[220],"HHMNet":[221],"on":[222],"NTU":[223],"RGB+D":[224],"60":[225],"120":[227],"NW-UCLA":[229],"datasets":[230],"demonstrate":[232],"that":[233],"it":[234],"achieves":[235],"state-of-the-art":[236],"performance":[237],"tasks.":[245]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
