{"id":"https://openalex.org/W4409186742","doi":"https://doi.org/10.1007/s44443-025-00023-4","title":"MSTFormer: multi-granularity spatial-temporal transformers for 3D human pose estimation","display_name":"MSTFormer: multi-granularity spatial-temporal transformers for 3D human pose estimation","publication_year":2025,"publication_date":"2025-04-04","ids":{"openalex":"https://openalex.org/W4409186742","doi":"https://doi.org/10.1007/s44443-025-00023-4"},"language":"en","primary_location":{"id":"doi:10.1007/s44443-025-00023-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s44443-025-00023-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s44443-025-00023-4.pdf","source":{"id":"https://openalex.org/S2764955546","display_name":"Journal of King Saud University - Computer and Information Sciences","issn_l":"1319-1578","issn":["1319-1578","2213-1248"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of King Saud University Computer and Information Sciences","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://link.springer.com/content/pdf/10.1007/s44443-025-00023-4.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082246610","display_name":"Hao Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hao Lin","raw_affiliation_strings":["School of Physics and Optoelectronic Engineering, Guangdong University of Technology, Guangdong, 510006, China"],"affiliations":[{"raw_affiliation_string":"School of Physics and Optoelectronic Engineering, Guangdong University of Technology, Guangdong, 510006, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101655746","display_name":"Sheng Xu","orcid":"https://orcid.org/0000-0002-7742-275X"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sheng Xu","raw_affiliation_strings":["School of Physics and Optoelectronic Engineering, Guangdong University of Technology, Guangdong, 510006, China"],"affiliations":[{"raw_affiliation_string":"School of Physics and Optoelectronic Engineering, Guangdong University of Technology, Guangdong, 510006, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101955349","display_name":"Chengyue Su","orcid":"https://orcid.org/0000-0003-0768-4006"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengyue Su","raw_affiliation_strings":["School of Advanced Manufacturing, Guangdong University of Technology, Guangdong, 515200, China","School of Physics and Optoelectronic Engineering, Guangdong University of Technology, Guangdong, 510006, China"],"affiliations":[{"raw_affiliation_string":"School of Advanced Manufacturing, Guangdong University of Technology, Guangdong, 515200, China","institution_ids":["https://openalex.org/I139024713"]},{"raw_affiliation_string":"School of Physics and Optoelectronic Engineering, Guangdong University of Technology, Guangdong, 510006, China","institution_ids":["https://openalex.org/I139024713"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5082246610"],"corresponding_institution_ids":["https://openalex.org/I139024713"],"apc_list":{"value":1350,"currency":"USD","value_usd":1350},"apc_paid":{"value":1350,"currency":"USD","value_usd":1350},"fwci":6.7492,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.96621538,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":"37","issue":"3","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.8670863509178162},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5750784277915955},{"id":"https://openalex.org/keywords/pose","display_name":"Pose","score":0.5249666571617126},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.5034510493278503},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4886382222175598},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4873993992805481},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4025689661502838},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.32388073205947876},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3228686451911926},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1524333655834198},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.12429645657539368},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.06625017523765564}],"concepts":[{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.8670863509178162},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5750784277915955},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.5249666571617126},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.5034510493278503},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4886382222175598},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4873993992805481},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4025689661502838},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.32388073205947876},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3228686451911926},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1524333655834198},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.12429645657539368},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.06625017523765564},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s44443-025-00023-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s44443-025-00023-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s44443-025-00023-4.pdf","source":{"id":"https://openalex.org/S2764955546","display_name":"Journal of King Saud University - Computer and Information Sciences","issn_l":"1319-1578","issn":["1319-1578","2213-1248"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of King Saud University Computer and Information Sciences","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:ee939637f8624d5ea904170dbcee95e0","is_oa":true,"landing_page_url":"https://doaj.org/article/ee939637f8624d5ea904170dbcee95e0","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of King Saud University: Computer and Information Sciences, Vol 37, Iss 3, Pp 1-19 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s44443-025-00023-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s44443-025-00023-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s44443-025-00023-4.pdf","source":{"id":"https://openalex.org/S2764955546","display_name":"Journal of King Saud University - Computer and Information Sciences","issn_l":"1319-1578","issn":["1319-1578","2213-1248"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of King Saud University Computer and Information Sciences","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409186742.pdf","grobid_xml":"https://content.openalex.org/works/W4409186742.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W2099333815","https://openalex.org/W2101032778","https://openalex.org/W2331143823","https://openalex.org/W2559085405","https://openalex.org/W2612706635","https://openalex.org/W2769237672","https://openalex.org/W2798646183","https://openalex.org/W2799211965","https://openalex.org/W2895689136","https://openalex.org/W2958088908","https://openalex.org/W2962896489","https://openalex.org/W2963383668","https://openalex.org/W2963781481","https://openalex.org/W2964221239","https://openalex.org/W2972662547","https://openalex.org/W3011743383","https://openalex.org/W3034448411","https://openalex.org/W3045471054","https://openalex.org/W3081460284","https://openalex.org/W3096609285","https://openalex.org/W3098473649","https://openalex.org/W3106838237","https://openalex.org/W3106882556","https://openalex.org/W3107167007","https://openalex.org/W3126541466","https://openalex.org/W3136525061","https://openalex.org/W3138516171","https://openalex.org/W3165924482","https://openalex.org/W3173811519","https://openalex.org/W3187589338","https://openalex.org/W3205327953","https://openalex.org/W3205717647","https://openalex.org/W4205427072","https://openalex.org/W4205502296","https://openalex.org/W4212783702","https://openalex.org/W4214893857","https://openalex.org/W4220911447","https://openalex.org/W4225154541","https://openalex.org/W4225557002","https://openalex.org/W4312966446","https://openalex.org/W4313032421","https://openalex.org/W4313068951","https://openalex.org/W4313506322","https://openalex.org/W4319300161","https://openalex.org/W4379985979","https://openalex.org/W4381946531","https://openalex.org/W4382892987","https://openalex.org/W4383890459","https://openalex.org/W4390873166","https://openalex.org/W6600045627","https://openalex.org/W6600263792"],"related_works":["https://openalex.org/W2931688134","https://openalex.org/W2377919138","https://openalex.org/W2378857091","https://openalex.org/W103652678","https://openalex.org/W4226090359","https://openalex.org/W2059697060","https://openalex.org/W936373746","https://openalex.org/W2975817033","https://openalex.org/W4256502920","https://openalex.org/W4382701072"],"abstract_inverted_index":{"The":[0,101],"2D-to-3D":[1],"lifting":[2],"approach":[3],"based":[4],"on":[5,20,178],"multi-granularity":[6,16,135,165],"methods":[7,17,35,177],"effectively":[8],"captures":[9],"spatial-temporal":[10,70,146],"features":[11,23],"at":[12,30],"various":[13],"scales.":[14],"Existing":[15],"primarily":[18],"focus":[19],"extracting":[21],"joint":[22],"through":[24],"graph-based":[25],"approaches,":[26],"which":[27],"abstract":[28],"information":[29,40,79,111],"different":[31,168],"levels.":[32,169],"However,":[33],"these":[34,58,145],"often":[36],"overlook":[37],"the":[38,77,84,108,113,116,122,129,134,139,164,179],"structured":[39,78,110],"inherent":[41],"in":[42,80],"skeleton":[43,81],"sequences,":[44],"such":[45],"as":[46],"global":[47],"connectivity,":[48],"continuous":[49],"motion":[50],"trajectories,":[51],"and":[52,73,96,162,181],"temporal":[53,130],"context":[54,131],"relationships.":[55],"To":[56],"address":[57],"limitations,":[59],"we":[60],"propose":[61],"a":[62,155],"novel":[63],"method,":[64],"Multi-granularity":[65,85,117,140,157],"Spatial-Temporal":[66,99],"Transformers":[67],"(MSTFormer),":[68],"for":[69],"feature":[71,91],"extraction":[72],"fusion":[74],"that":[75,173],"leverages":[76],"sequences.":[82],"First,":[83],"Spatial":[86,103],"Transformer":[87,104,119,125],"Module":[88,120,143],"constructs":[89],"hierarchical":[90],"representations":[92,166],"of":[93,112,133],"joints,":[94],"bones,":[95],"limbs":[97],"using":[98],"Pooling.":[100],"Multi-level":[102],"Encoder":[105,126],"then":[106],"extracts":[107],"spatial":[109,136],"skeleton.":[114],"Next,":[115],"Temporal":[118,124],"utilizes":[121],"Attention-Enhanced":[123],"to":[127,160],"model":[128],"relationships":[132],"features.":[137],"Finally,":[138],"Feature":[141],"Fusion":[142],"integrates":[144],"features,":[147],"generating":[148],"accurate":[149],"3D":[150],"pose":[151],"representations.":[152],"We":[153],"introduce":[154],"new":[156],"Loss":[158],"Function":[159],"align":[161],"balance":[163],"across":[167],"Experimental":[170],"results":[171],"demonstrate":[172],"MSTFormer":[174],"outperforms":[175],"state-of-the-art":[176],"Human3.6M":[180],"HumanEva-I":[182],"datasets,":[183],"achieving":[184],"superior":[185],"performance":[186],"with":[187],"fewer":[188],"parameters.":[189]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
