{"id":"https://openalex.org/W4387968449","doi":"https://doi.org/10.1145/3581783.3612449","title":"Unified Multi-modal Unsupervised Representation Learning for Skeleton-based Action Understanding","display_name":"Unified Multi-modal Unsupervised Representation Learning for Skeleton-based Action Understanding","publication_year":2023,"publication_date":"2023-10-26","ids":{"openalex":"https://openalex.org/W4387968449","doi":"https://doi.org/10.1145/3581783.3612449"},"language":"en","primary_location":{"id":"doi:10.1145/3581783.3612449","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3612449","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103179286","display_name":"Shengkai Sun","orcid":"https://orcid.org/0009-0007-1400-5012"},"institutions":[{"id":"https://openalex.org/I75059550","display_name":"Zhejiang Gongshang University","ror":"https://ror.org/0569mkk41","country_code":"CN","type":"education","lineage":["https://openalex.org/I75059550"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shengkai Sun","raw_affiliation_strings":["Zhejiang Gongshang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang Gongshang University, Hangzhou, China","institution_ids":["https://openalex.org/I75059550"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078220957","display_name":"Daizong Liu","orcid":"https://orcid.org/0000-0001-8179-4508"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daizong Liu","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008417257","display_name":"Jianfeng Dong","orcid":"https://orcid.org/0000-0001-5244-3274"},"institutions":[{"id":"https://openalex.org/I75059550","display_name":"Zhejiang Gongshang University","ror":"https://ror.org/0569mkk41","country_code":"CN","type":"education","lineage":["https://openalex.org/I75059550"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianfeng Dong","raw_affiliation_strings":["Zhejiang Gongshang University &amp; Zhejiang Key Lab of E-Commerce, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang Gongshang University &amp; Zhejiang Key Lab of E-Commerce, Hangzhou, China","institution_ids":["https://openalex.org/I75059550"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065871371","display_name":"Xiaoye Qu","orcid":"https://orcid.org/0000-0002-4907-3978"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoye Qu","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014526931","display_name":"Junyu Gao","orcid":"https://orcid.org/0000-0002-8105-5497"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junyu Gao","raw_affiliation_strings":["Institute of automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034737032","display_name":"Xun Yang","orcid":"https://orcid.org/0000-0003-0201-1638"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xun Yang","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106406563","display_name":"Xun Wang","orcid":"https://orcid.org/0000-0001-5566-4689"},"institutions":[{"id":"https://openalex.org/I75059550","display_name":"Zhejiang Gongshang University","ror":"https://ror.org/0569mkk41","country_code":"CN","type":"education","lineage":["https://openalex.org/I75059550"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xun Wang","raw_affiliation_strings":["Zhejiang Gongshang University &amp; Zhejiang Key Lab of E-Commerce, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang Gongshang University &amp; Zhejiang Key Lab of E-Commerce, Hangzhou, China","institution_ids":["https://openalex.org/I75059550"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100377147","display_name":"Meng Wang","orcid":"https://orcid.org/0000-0002-3094-7735"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Wang","raw_affiliation_strings":["Hefei University of Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Hefei University of Technology, Hefei, China","institution_ids":["https://openalex.org/I16365422"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5103179286"],"corresponding_institution_ids":["https://openalex.org/I75059550"],"apc_list":null,"apc_paid":null,"fwci":2.7461,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.92317029,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2973","last_page":"2984"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9864000082015991,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7617815732955933},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.7095679640769958},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.7066022157669067},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.629676342010498},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.5295335054397583},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5232842564582825},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5019776821136475},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4913133680820465},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.4547613561153412},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4252646565437317},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.41816315054893494},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.41035473346710205},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3585590124130249}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7617815732955933},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.7095679640769958},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.7066022157669067},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.629676342010498},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.5295335054397583},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5232842564582825},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5019776821136475},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4913133680820465},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.4547613561153412},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4252646565437317},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.41816315054893494},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.41035473346710205},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3585590124130249},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3581783.3612449","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3612449","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4579459290","display_name":null,"funder_award_id":"61976188, 62272435, U22A2094","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320338468","display_name":"Basic Public Welfare Research Program of Zhejiang Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W1983364832","https://openalex.org/W2126579184","https://openalex.org/W2145979309","https://openalex.org/W2187089797","https://openalex.org/W2787919227","https://openalex.org/W2902214002","https://openalex.org/W2944006115","https://openalex.org/W2948058585","https://openalex.org/W2948246283","https://openalex.org/W2951323451","https://openalex.org/W2963076818","https://openalex.org/W2963155035","https://openalex.org/W2964134613","https://openalex.org/W2979930345","https://openalex.org/W2990503944","https://openalex.org/W3034482833","https://openalex.org/W3034548564","https://openalex.org/W3035309251","https://openalex.org/W3035524453","https://openalex.org/W3092424783","https://openalex.org/W3092739351","https://openalex.org/W3092754310","https://openalex.org/W3092803144","https://openalex.org/W3093174808","https://openalex.org/W3103184573","https://openalex.org/W3105195350","https://openalex.org/W3106350522","https://openalex.org/W3106776857","https://openalex.org/W3109142545","https://openalex.org/W3111746241","https://openalex.org/W3152619510","https://openalex.org/W3156509901","https://openalex.org/W3169413442","https://openalex.org/W3170039146","https://openalex.org/W3173415456","https://openalex.org/W3175082063","https://openalex.org/W3203227473","https://openalex.org/W3203514840","https://openalex.org/W3204980132","https://openalex.org/W3205032456","https://openalex.org/W3205106480","https://openalex.org/W3205572000","https://openalex.org/W3205898195","https://openalex.org/W3213331968","https://openalex.org/W4200634815","https://openalex.org/W4205817612","https://openalex.org/W4283026051","https://openalex.org/W4304080717","https://openalex.org/W4312387119","https://openalex.org/W4312675926","https://openalex.org/W4312841534","https://openalex.org/W4313185874","https://openalex.org/W4320008746","https://openalex.org/W4321231508","https://openalex.org/W4322627163","https://openalex.org/W4323663038","https://openalex.org/W4377158291","https://openalex.org/W4382240124","https://openalex.org/W4382240189","https://openalex.org/W4382240256","https://openalex.org/W4382459184"],"related_works":["https://openalex.org/W627697492","https://openalex.org/W3174759195","https://openalex.org/W3167013339","https://openalex.org/W4287121366","https://openalex.org/W4283320496","https://openalex.org/W4390871823","https://openalex.org/W60493759","https://openalex.org/W3213069564","https://openalex.org/W4386437125","https://openalex.org/W4311555960"],"abstract_inverted_index":{"Unsupervised":[0,76],"pre-training":[1],"has":[2],"shown":[3],"great":[4],"success":[5],"in":[6,68,94,211,251],"skeleton-based":[7,252],"action":[8,29,209,253],"understanding":[9,30,210],"recently.":[10],"Existing":[11],"works":[12],"typically":[13],"train":[14],"separate":[15,102],"modality-specific":[16,103],"models":[17],"(i.e.,":[18],"joint,":[19],"bone,":[20],"and":[21,157,177,224],"motion),":[22],"then":[23],"integrate":[24],"the":[25,45,59,91,116,135,164,168,189,234,238],"multi-modal":[26,92,126,137,165,195],"information":[27],"for":[28,106,128,207],"by":[31,147],"a":[32,73,95,148],"late-fusion":[33],"strategy.":[34],"Although":[35],"these":[36,66],"approaches":[37],"have":[38],"achieved":[39],"significant":[40],"performance,":[41],"they":[42],"suffer":[43],"from":[44],"complex":[46],"yet":[47],"redundant":[48],"multi-stream":[49],"model":[50,130],"designs,":[51],"each":[52,172],"of":[53,100,171,192,204],"which":[54,82,198],"is":[55,185,199,230,259],"also":[56],"limited":[57],"to":[58,88,123,161,187,201],"fixed":[60],"input":[61,206],"skeleton":[62,196],"modality.":[63],"To":[64,132],"alleviate":[65],"issues,":[67],"this":[69,181],"paper,":[70],"we":[71,110,152],"propose":[72,154],"Unified":[74],"Multimodal":[75],"Representation":[77],"Learning":[78],"framework,":[79],"called":[80],"UmURL,":[81],"exploits":[83],"an":[84,120],"efficient":[85],"early-fusion":[86,121],"strategy":[87,122],"jointly":[89],"encode":[90],"features":[93,127,138,166],"single-stream":[96],"manner.":[97],"Specifically,":[98],"instead":[99],"designing":[101],"optimization":[104],"processes":[105],"uni-modal":[107,193,239],"unsupervised":[108],"learning,":[109],"feed":[111],"different":[112,202],"modality":[113,142,150,205],"inputs":[114],"into":[115],"same":[117],"stream":[118],"with":[119,237],"learn":[124,188],"their":[125],"reducing":[129],"complexity.":[131],"ensure":[133],"that":[134,163,228],"fused":[136],"do":[139],"not":[140],"exhibit":[141],"bias,":[143],"i.e.,":[144,221],"being":[145],"dominated":[146],"certain":[149],"input,":[151,197],"further":[153],"both":[155],"intra-":[156],"inter-modal":[158],"consistency":[159],"learning":[160],"guarantee":[162],"contain":[167],"complete":[169],"semantics":[170],"modal":[173],"via":[174],"feature":[175],"decomposition":[176],"distinct":[178],"alignment.":[179],"In":[180],"manner,":[182],"our":[183],"framework":[184],"able":[186],"unified":[190],"representations":[191],"or":[194],"flexible":[200],"kinds":[203],"robust":[208],"practical":[212],"cases.":[213],"Extensive":[214],"experiments":[215],"conducted":[216],"on":[217],"three":[218],"large-scale":[219],"datasets,":[220],"NTU-60,":[222],"NTU-120,":[223],"PKU-MMD":[225],"II,":[226],"demonstrate":[227],"UmURL":[229],"highly":[231],"efficient,":[232],"possessing":[233],"approximate":[235],"complexity":[236],"methods,":[240],"while":[241],"achieving":[242],"new":[243],"state-of-the-art":[244],"performance":[245],"across":[246],"various":[247],"downstream":[248],"task":[249],"scenarios":[250],"representation":[254],"learning.":[255],"Our":[256],"source":[257],"code":[258],"available":[260],"at":[261],"https://github.com/HuiGuanLab/UmURL.":[262]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":6}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
