{"id":"https://openalex.org/W7159621764","doi":"https://doi.org/10.48550/arxiv.2604.28130","title":"MoCapAnything V2: End-to-End Motion Capture for Arbitrary Skeletons","display_name":"MoCapAnything V2: End-to-End Motion Capture for Arbitrary Skeletons","publication_year":2026,"publication_date":"2026-04-30","ids":{"openalex":"https://openalex.org/W7159621764","doi":"https://doi.org/10.48550/arxiv.2604.28130"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.28130","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.28130","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.28130","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036660855","display_name":"Kehong Gong","orcid":"https://orcid.org/0000-0002-0935-7044"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gong, Kehong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109762942","display_name":"Zhengyu Wen","orcid":"https://orcid.org/0009-0005-1274-1484"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Zhengyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134992664","display_name":"Dao Thien Phong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Phong, Dao Thien","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134995162","display_name":"Mingxi Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Mingxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114119465","display_name":"Weixia He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Weixia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134941886","display_name":"Qi Wang (22418)","orcid":"https://orcid.org/0009-0001-6327-5402"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Qi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134978311","display_name":"Ning Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ning","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134937121","display_name":"Zhengyu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zhengyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134969365","display_name":"Guanli Hou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hou, Guanli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080414200","display_name":"Dongze Lian","orcid":"https://orcid.org/0000-0002-4947-0316"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lian, Dongze","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134945321","display_name":"Xiaoyu He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Xiaoyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134951605","display_name":"Mingyuan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Mingyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134978681","display_name":"Hanwang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Hanwang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5036660855"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.6830999851226807,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.6830999851226807,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.22190000116825104,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.03180000185966492,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5806000232696533},{"id":"https://openalex.org/keywords/motion-capture","display_name":"Motion capture","score":0.5497999787330627},{"id":"https://openalex.org/keywords/rotation","display_name":"Rotation (mathematics)","score":0.5152000188827515},{"id":"https://openalex.org/keywords/kinematics","display_name":"Kinematics","score":0.4668999910354614},{"id":"https://openalex.org/keywords/degrees-of-freedom","display_name":"Degrees of freedom (physics and chemistry)","score":0.43650001287460327},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.42910000681877136},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.4147999882698059},{"id":"https://openalex.org/keywords/animation","display_name":"Animation","score":0.40209999680519104},{"id":"https://openalex.org/keywords/coordinate-system","display_name":"Coordinate system","score":0.37119999527931213}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6693000197410583},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5806000232696533},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.5497999787330627},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.522599995136261},{"id":"https://openalex.org/C74050887","wikidata":"https://www.wikidata.org/wiki/Q848368","display_name":"Rotation (mathematics)","level":2,"score":0.5152000188827515},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48730000853538513},{"id":"https://openalex.org/C39920418","wikidata":"https://www.wikidata.org/wiki/Q11476","display_name":"Kinematics","level":2,"score":0.4668999910354614},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4643000066280365},{"id":"https://openalex.org/C208081375","wikidata":"https://www.wikidata.org/wiki/Q274502","display_name":"Degrees of freedom (physics and chemistry)","level":2,"score":0.43650001287460327},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.42910000681877136},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.4147999882698059},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.40209999680519104},{"id":"https://openalex.org/C80551277","wikidata":"https://www.wikidata.org/wiki/Q11210","display_name":"Coordinate system","level":2,"score":0.37119999527931213},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.36329999566078186},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C77265313","wikidata":"https://www.wikidata.org/wiki/Q879844","display_name":"Rest (music)","level":2,"score":0.33799999952316284},{"id":"https://openalex.org/C172849965","wikidata":"https://www.wikidata.org/wiki/Q3148875","display_name":"Reference frame","level":3,"score":0.3206000030040741},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.299699991941452},{"id":"https://openalex.org/C2777036941","wikidata":"https://www.wikidata.org/wiki/Q6917771","display_name":"Motion analysis","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.28600001335144043},{"id":"https://openalex.org/C83633838","wikidata":"https://www.wikidata.org/wiki/Q1256564","display_name":"Rotation matrix","level":2,"score":0.2856000065803528},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C71559656","wikidata":"https://www.wikidata.org/wiki/Q671298","display_name":"Divide and conquer algorithms","level":2,"score":0.2761000096797943},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C2776196297","wikidata":"https://www.wikidata.org/wiki/Q17138781","display_name":"Twist","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.25679999589920044},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.28130","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.28130","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.28130","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.28130","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.717770516872406,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"methods":[1],"for":[2,69,203],"arbitrary-skeleton":[3],"motion":[4],"capture":[5],"from":[6,62,104,135,180,223],"monocular":[7],"video":[8,181],"follow":[9],"a":[10,14,131,164,195],"factorized":[11],"pipeline,":[12],"where":[13],"Video-to-Pose":[15,87],"network":[16],"predicts":[17,176],"joint":[18,27,37,111,177],"positions":[19,38,112,178],"and":[20,44,54,88,92,122,168,190,207,214,229],"an":[21],"analytical":[22],"inverse-kinematics":[23],"(IK)":[24],"stage":[25,58],"recovers":[26],"rotations.":[28],"While":[29],"effective,":[30],"this":[31,75],"design":[32],"is":[33],"inherently":[34],"limited,":[35],"since":[36],"do":[39],"not":[40,145],"fully":[41,81],"determine":[42],"rotations":[43,117],"leave":[45],"degrees":[46,225,232],"of":[47],"freedom":[48],"such":[49],"as":[50],"bone-axis":[51],"twist":[52],"ambiguous,":[53],"the":[55,60,70,79,98,109,136,142,148,153],"non-differentiable":[56],"IK":[57],"prevents":[59],"system":[61,107],"adapting":[63],"to":[64,115,226,230],"noisy":[65],"predictions":[66],"or":[67],"optimizing":[68],"final":[71],"animation":[72],"objective.":[73],"In":[74,172],"work,":[76],"we":[77,129],"present":[78],"first":[80],"end-to-end":[82],"framework":[83],"in":[84,100],"which":[85],"both":[86,188],"Pose-to-Rotation":[89],"are":[90],"learnable":[91],"jointly":[93],"optimized.":[94],"We":[95],"observe":[96],"that":[97,217],"ambiguity":[99],"pose-to-rotation":[101],"mapping":[102,149],"arises":[103],"missing":[105],"coordinate":[106,156],"information:":[108],"same":[110],"can":[113],"correspond":[114],"different":[116,119],"under":[118],"rest":[120,143],"poses":[121],"local":[123,205],"axis":[124],"conventions.":[125],"To":[126],"resolve":[127],"this,":[128],"introduce":[130],"reference":[132],"pose-rotation":[133],"pair":[134],"target":[137],"asset,":[138],"which,":[139],"together":[140],"with":[141],"pose,":[144],"only":[146],"anchors":[147],"but":[150],"also":[151],"defines":[152],"underlying":[154],"rotation":[155,161,221],"system.":[157],"This":[158],"formulation":[159],"turns":[160],"prediction":[162],"into":[163],"well-constrained":[165],"conditional":[166],"problem":[167],"enables":[169],"effective":[170],"learning.":[171],"addition,":[173],"our":[174,218],"model":[175],"directly":[179],"without":[182],"relying":[183],"on":[184,211,233],"mesh":[185],"intermediates,":[186],"improving":[187],"robustness":[189],"efficiency.":[191],"Both":[192],"stages":[193],"share":[194],"skeleton-aware":[196],"Global-Local":[197],"Graph-guided":[198],"Multi-Head":[199],"Attention":[200],"(GL-GMHA)":[201],"module":[202],"joint-level":[204],"reasoning":[206],"global":[208],"coordination.":[209],"Experiments":[210],"Truebones":[212],"Zoo":[213],"Objaverse":[215],"show":[216],"method":[219],"reduces":[220],"error":[222],"~17":[224],"~10":[227],"degrees,":[228],"6.54":[231],"unseen":[234],"skeletons,":[235],"while":[236],"achieving":[237],"~20x":[238],"faster":[239],"inference":[240],"than":[241],"mesh-based":[242],"pipelines.":[243],"Project":[244],"page:":[245],"https://animotionlab.github.io/MoCapAnythingV2/":[246]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-05-02T00:00:00"}
