{"id":"https://openalex.org/W4416748533","doi":"https://doi.org/10.1109/iros60139.2025.11246826","title":"3D-AMTA: Occlusion-Aware Real-Time 3D Hand Pose Estimation with Auto Mask and Token-Specific Attention","display_name":"3D-AMTA: Occlusion-Aware Real-Time 3D Hand Pose Estimation with Auto Mask and Token-Specific Attention","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416748533","doi":"https://doi.org/10.1109/iros60139.2025.11246826"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11246826","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246826","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101671477","display_name":"Dongfang Zhao","orcid":"https://orcid.org/0000-0002-0677-634X"},"institutions":[{"id":"https://openalex.org/I100625452","display_name":"ON Semiconductor (United States)","ror":"https://ror.org/03nw6pt28","country_code":"US","type":"company","lineage":["https://openalex.org/I100625452"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dongfang Zhao","raw_affiliation_strings":["Samsung Semiconductor Inc.,SoC CMM Lab,San Diego,California,USA"],"affiliations":[{"raw_affiliation_string":"Samsung Semiconductor Inc.,SoC CMM Lab,San Diego,California,USA","institution_ids":["https://openalex.org/I100625452"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010384205","display_name":"Menghe Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I100625452","display_name":"ON Semiconductor (United States)","ror":"https://ror.org/03nw6pt28","country_code":"US","type":"company","lineage":["https://openalex.org/I100625452"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Menghe Zhang","raw_affiliation_strings":["Samsung Semiconductor Inc.,SoC CMM Lab,San Diego,California,USA"],"affiliations":[{"raw_affiliation_string":"Samsung Semiconductor Inc.,SoC CMM Lab,San Diego,California,USA","institution_ids":["https://openalex.org/I100625452"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039560728","display_name":"Yangwen Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I100625452","display_name":"ON Semiconductor (United States)","ror":"https://ror.org/03nw6pt28","country_code":"US","type":"company","lineage":["https://openalex.org/I100625452"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yangwen Liang","raw_affiliation_strings":["Samsung Semiconductor Inc.,SoC CMM Lab,San Diego,California,USA"],"affiliations":[{"raw_affiliation_string":"Samsung Semiconductor Inc.,SoC CMM Lab,San Diego,California,USA","institution_ids":["https://openalex.org/I100625452"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064680456","display_name":"Shuangquan Wang","orcid":"https://orcid.org/0000-0002-3967-9693"},"institutions":[{"id":"https://openalex.org/I100625452","display_name":"ON Semiconductor (United States)","ror":"https://ror.org/03nw6pt28","country_code":"US","type":"company","lineage":["https://openalex.org/I100625452"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shuangquan Wang","raw_affiliation_strings":["Samsung Semiconductor Inc.,SoC CMM Lab,San Diego,California,USA"],"affiliations":[{"raw_affiliation_string":"Samsung Semiconductor Inc.,SoC CMM Lab,San Diego,California,USA","institution_ids":["https://openalex.org/I100625452"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113919277","display_name":"Kee-Bong Song","orcid":null},"institutions":[{"id":"https://openalex.org/I100625452","display_name":"ON Semiconductor (United States)","ror":"https://ror.org/03nw6pt28","country_code":"US","type":"company","lineage":["https://openalex.org/I100625452"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kee-Bong Song","raw_affiliation_strings":["Samsung Semiconductor Inc.,SoC CMM Lab,San Diego,California,USA"],"affiliations":[{"raw_affiliation_string":"Samsung Semiconductor Inc.,SoC CMM Lab,San Diego,California,USA","institution_ids":["https://openalex.org/I100625452"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100448779","display_name":"Donghoon Kim","orcid":"https://orcid.org/0000-0002-3034-1231"},"institutions":[{"id":"https://openalex.org/I100625452","display_name":"ON Semiconductor (United States)","ror":"https://ror.org/03nw6pt28","country_code":"US","type":"company","lineage":["https://openalex.org/I100625452"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Donghoon Kim","raw_affiliation_strings":["Samsung Semiconductor Inc.,SoC CMM Lab,San Diego,California,USA"],"affiliations":[{"raw_affiliation_string":"Samsung Semiconductor Inc.,SoC CMM Lab,San Diego,California,USA","institution_ids":["https://openalex.org/I100625452"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101671477"],"corresponding_institution_ids":["https://openalex.org/I100625452"],"apc_list":null,"apc_paid":null,"fwci":1.5534,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.87058578,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"13590","last_page":"13595"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.7167999744415283,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.7167999744415283,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.12729999423027039,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.09080000221729279,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pose","display_name":"Pose","score":0.7128999829292297},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.6154999732971191},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.5562999844551086},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5256999731063843},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.48730000853538513},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.414000004529953},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.41130000352859497},{"id":"https://openalex.org/keywords/3d-pose-estimation","display_name":"3D pose estimation","score":0.35679998993873596}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.8220000267028809},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7717999815940857},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7202000021934509},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.7128999829292297},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.6154999732971191},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.5562999844551086},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5256999731063843},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.48730000853538513},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.414000004529953},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.41130000352859497},{"id":"https://openalex.org/C36613465","wikidata":"https://www.wikidata.org/wiki/Q4636322","display_name":"3D pose estimation","level":3,"score":0.35679998993873596},{"id":"https://openalex.org/C2988191880","wikidata":"https://www.wikidata.org/wiki/Q40687","display_name":"Robotic hand","level":3,"score":0.3368000090122223},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.334199994802475},{"id":"https://openalex.org/C17511633","wikidata":"https://www.wikidata.org/wiki/Q830694","display_name":"SMT placement equipment","level":3,"score":0.33169999718666077},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.32030001282691956},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C150415221","wikidata":"https://www.wikidata.org/wiki/Q40687","display_name":"Robotic arm","level":2,"score":0.30559998750686646},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3052999973297119},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.2540000081062317}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11246826","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246826","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2057069782","https://openalex.org/W2916798096","https://openalex.org/W2941359057","https://openalex.org/W2973857456","https://openalex.org/W3087784721","https://openalex.org/W3097623574","https://openalex.org/W3202716970","https://openalex.org/W3215845947","https://openalex.org/W4214684804","https://openalex.org/W4298014233","https://openalex.org/W4312383858","https://openalex.org/W4312395531","https://openalex.org/W4385346076","https://openalex.org/W4386065417","https://openalex.org/W4386075921","https://openalex.org/W4390872393","https://openalex.org/W4390873007","https://openalex.org/W4394625766","https://openalex.org/W4413144709","https://openalex.org/W4415799049"],"related_works":[],"abstract_inverted_index":{"Understanding":[0],"hand":[1,32,52,133],"motion":[2],"from":[3],"a":[4,20,80],"single":[5],"RGB":[6],"image":[7],"is":[8],"challenging":[9],"due":[10],"to":[11,131],"occlusions":[12],"and":[13,26,47,74,110,125,138],"high":[14,97],"articulation.":[15],"This":[16],"paper":[17],"presents":[18],"3D-AMTA,":[19],"transformer-based":[21,59],"framework":[22],"with":[23],"Auto":[24],"Mask":[25],"Token-specific":[27],"Attention":[28],"for":[29,44,50,84,102,135],"occlusion-aware":[30],"3D":[31],"pose":[33],"estimation":[34],"(HPE).":[35],"We":[36],"propose":[37,78],"two":[38],"novel":[39],"architectural":[40],"enhancements:":[41],"auto":[42],"mask":[43],"high-occlusion":[45],"scenarios,":[46],"token-specific":[48],"attention":[49],"fine-grained":[51],"articulations.":[53],"These":[54,128],"modules":[55],"seamlessly":[56],"integrate":[57],"into":[58],"architectures":[60],"that":[61,113],"enhance":[62],"real-time":[63],"performance":[64],"in":[65,120],"interactive":[66,136],"systems.":[67],"To":[68],"enable":[69],"efficient":[70],"deployment":[71],"on":[72,91,108],"robotic":[73,104],"embedded":[75],"platforms,":[76],"we":[77],"3D-AMTA-Mobile,":[79],"lightweight":[81],"variant":[82],"optimized":[83],"on-device":[85],"processing.":[86],"It":[87],"achieves":[88],"267":[89],"FPS":[90],"NVIDIA":[92],"RTX":[93],"2080Ti-GPU":[94],"while":[95],"maintaining":[96],"accuracy,":[98,123],"making":[99],"it":[100],"well-suited":[101],"resource-constrained":[103],"applications.":[105],"Extensive":[106],"evaluations":[107],"FreiHAND":[109],"HO3D":[111],"demonstrate":[112],"our":[114],"approach":[115],"consistently":[116],"outperforms":[117],"state-of-the-art":[118],"methods":[119],"terms":[121],"of":[122],"efficiency,":[124],"inference":[126],"speed.":[127],"advancements":[129],"contribute":[130],"robust":[132],"perception":[134],"robotics":[137],"AR-based":[139],"teleoperation.":[140]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-11-28T00:00:00"}
