{"id":"https://openalex.org/W7154456015","doi":"https://doi.org/10.48550/arxiv.2604.12929","title":"Grasp in Gaussians: Fast Monocular Reconstruction of Dynamic Hand-Object Interactions","display_name":"Grasp in Gaussians: Fast Monocular Reconstruction of Dynamic Hand-Object Interactions","publication_year":2026,"publication_date":"2026-04-14","ids":{"openalex":"https://openalex.org/W7154456015","doi":"https://doi.org/10.48550/arxiv.2604.12929"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.12929","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12929","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.12929","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119619924","display_name":"Ayce Idil Aytekin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Aytekin, Ayce Idil","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133700452","display_name":"Xu Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102848878","display_name":"Zhengyang Shen","orcid":"https://orcid.org/0000-0003-3442-6344"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Zhengyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013075810","display_name":"Thabo Beeler","orcid":"https://orcid.org/0000-0002-8077-1205"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Beeler, Thabo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133664596","display_name":"Helge Rhodin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rhodin, Helge","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089712643","display_name":"Rishabh Dabral","orcid":"https://orcid.org/0009-0004-1245-4146"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dabral, Rishabh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020664641","display_name":"Christian Theobalt","orcid":"https://orcid.org/0000-0001-6104-6625"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Theobalt, Christian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5119619924"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.6908000111579895,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.6908000111579895,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.15410000085830688,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.08380000293254852,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/grasp","display_name":"GRASP","score":0.7146000266075134},{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.6330999732017517},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5580000281333923},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.5476999878883362},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5302000045776367},{"id":"https://openalex.org/keywords/position","display_name":"Position (finance)","score":0.508400022983551},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.4440999925136566},{"id":"https://openalex.org/keywords/3d-reconstruction","display_name":"3D reconstruction","score":0.4032000005245209},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.3849000036716461}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.8008999824523926},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7922999858856201},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.7146000266075134},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6905999779701233},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.6330999732017517},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5580000281333923},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.5476999878883362},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5302000045776367},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.508400022983551},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.4440999925136566},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.4032000005245209},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.3849000036716461},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.37070000171661377},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.3691999912261963},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.3147999942302704},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.310699999332428},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.30149999260902405},{"id":"https://openalex.org/C83248878","wikidata":"https://www.wikidata.org/wiki/Q344000","display_name":"Active appearance model","level":3,"score":0.3009999990463257},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.29510000348091125},{"id":"https://openalex.org/C95020103","wikidata":"https://www.wikidata.org/wiki/Q1813492","display_name":"Match moving","level":3,"score":0.28360000252723694},{"id":"https://openalex.org/C146159030","wikidata":"https://www.wikidata.org/wiki/Q7625099","display_name":"Structure from motion","level":3,"score":0.2669000029563904},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.26460000872612},{"id":"https://openalex.org/C2777708103","wikidata":"https://www.wikidata.org/wiki/Q852589","display_name":"Motion blur","level":3,"score":0.26440000534057617},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.2540000081062317}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.12929","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12929","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.12929","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12929","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,78],"present":[1],"Grasp":[2],"in":[3],"Gaussians":[4],"(GraG),":[5],"a":[6,18,62,85,97,119,147],"fast":[7,108],"and":[8,37,53,72,82,107,139,184],"robust":[9],"method":[10,31],"for":[11],"reconstructing":[12],"dynamic":[13],"3D":[14,149],"hand-object":[15,56,168],"interactions":[16,169],"from":[17,43,68,123],"single":[19],"monocular":[20,125],"video.":[21],"Unlike":[22],"recent":[23],"approaches":[24],"that":[25,51,163],"optimize":[26],"heavy":[27],"neural":[28],"representations,":[29],"our":[30],"focuses":[32],"on":[33,159,170],"tracking":[34,70,109],"the":[35,38,91,115],"hand":[36,126,131,150],"object":[39,80,180],"efficiently,":[40],"once":[41],"initialized":[42],"pretrained":[44],"large":[45],"models.":[46],"Our":[47],"key":[48],"insight":[49],"is":[50],"accurate":[52],"temporally":[54,166],"stable":[55,155],"motion":[57,132],"can":[58],"be":[59],"recovered":[60],"using":[61,84,133],"compact":[63,103],"Sum-of-Gaussians":[64],"(SoG)":[65],"representation,":[66],"revived":[67],"classical":[69],"literature":[71],"integrated":[73],"with":[74],"generative":[75],"Gaussian-based":[76],"initializations.":[77],"initialize":[79],"pose":[81,127],"geometry":[83],"video-adapted":[86],"SAM3D":[87],"pipeline,":[88],"then":[89],"convert":[90],"resulting":[92],"dense":[93],"Gaussian":[94],"representation":[95,104],"into":[96],"lightweight":[98],"SoG":[99],"via":[100],"subsampling.":[101],"This":[102],"enables":[105],"efficient":[106],"while":[110,153,178],"preserving":[111],"geometric":[112],"fidelity.":[113],"For":[114],"hand,":[116],"we":[117,129],"adopt":[118],"complementary":[120],"strategy:":[121],"starting":[122],"off-the-shelf":[124],"initialization,":[128],"refine":[130],"simple":[134],"yet":[135],"effective":[136],"2D":[137],"joint":[138],"depth":[140],"alignment":[141],"losses,":[142],"avoiding":[143],"per-frame":[144],"refinement":[145],"of":[146],"detailed":[148],"appearance":[151],"model":[152],"maintaining":[154],"articulation.":[156],"Extensive":[157],"experiments":[158],"public":[160],"benchmarks":[161],"demonstrate":[162],"GraG":[164],"reconstructs":[165],"coherent":[167],"long":[171],"sequences":[172],"6.4x":[173],"faster":[174],"than":[175],"prior":[176],"work":[177],"improving":[179],"reconstruction":[181],"by":[182,190],"13.4%":[183],"reducing":[185],"hand's":[186],"per-joint":[187],"position":[188],"error":[189],"over":[191],"65%.":[192]},"counts_by_year":[],"updated_date":"2026-05-04T08:30:34.212998","created_date":"2026-04-16T00:00:00"}
