{"id":"https://openalex.org/W7155350857","doi":"https://doi.org/10.48550/arxiv.2604.19923","title":"UniCon3R: Unified Contact-aware 4D Human-Scene Reconstruction from Monocular Video","display_name":"UniCon3R: Unified Contact-aware 4D Human-Scene Reconstruction from Monocular Video","publication_year":2026,"publication_date":"2026-04-21","ids":{"openalex":"https://openalex.org/W7155350857","doi":"https://doi.org/10.48550/arxiv.2604.19923"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.19923","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19923","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.19923","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029123468","display_name":"Tanuj Sur","orcid":"https://orcid.org/0000-0001-5298-5241"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sur, Tanuj","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134428288","display_name":"Shashank Tripathi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tripathi, Shashank","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134435051","display_name":"Nikos Athanasiou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Athanasiou, Nikos","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104343813","display_name":"Ha Linh Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Ha Linh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134407125","display_name":"Kai Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Kai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134430033","display_name":"Michael J. Black","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Black, Michael J.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134424815","display_name":"Angela Yao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Angela","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5029123468"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.8611000180244446,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.8611000180244446,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.06289999932050705,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.03449999913573265,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.6581000089645386},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.5981000065803528},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5730999708175659},{"id":"https://openalex.org/keywords/motion-capture","display_name":"Motion capture","score":0.462799996137619},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.3815000057220459},{"id":"https://openalex.org/keywords/3d-reconstruction","display_name":"3D reconstruction","score":0.3434999883174896}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7590000033378601},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7490000128746033},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7134000062942505},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.6581000089645386},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.5981000065803528},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5730999708175659},{"id":"https://openalex.org/C48007421","wikidata":"https://www.wikidata.org/wiki/Q676252","display_name":"Motion capture","level":3,"score":0.462799996137619},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.3815000057220459},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3790000081062317},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.3434999883174896},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.34119999408721924},{"id":"https://openalex.org/C146159030","wikidata":"https://www.wikidata.org/wiki/Q7625099","display_name":"Structure from motion","level":3,"score":0.3264999985694885},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.3172000050544739},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C2986578859","wikidata":"https://www.wikidata.org/wiki/Q657632","display_name":"Human motion","level":3,"score":0.2791000008583069},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.27309998869895935}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.19923","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19923","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.19923","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19923","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11","score":0.47598400712013245}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"introduce":[1],"UniCon3R,":[2],"a":[3,95,153,159],"unified":[4],"feed-forward":[5,16,141],"framework":[6],"for":[7,98,162],"online":[8],"human-scene":[9,17,166],"4D":[10,81,113],"reconstruction":[11,18],"from":[12,21,83],"monocular":[13],"video.":[14],"Current":[15],"methods":[19],"suffer":[20],"artifacts,":[22],"where":[23],"bodies":[24],"float":[25],"above":[26],"the":[27,33,39,46,49,58,61,68,84,92,100,116],"ground":[28],"or":[29],"penetrate":[30],"parts":[31],"of":[32,41],"scene.":[34,117],"A":[35],"key":[36],"reason":[37],"is":[38,53,170],"lack":[40],"effective":[42],"interaction":[43,78],"modelling":[44],"between":[45,57],"human":[47,59,69,85,135],"and":[48,60,87,90,110,133],"environment.":[50],"Our":[51],"goal":[52],"to":[54,65,105],"exploit":[55],"contact":[56,82,93,150],"scene":[62,88,108],"during":[63],"inference":[64,142],"actively":[66],"improve":[67],"mesh":[70],"reconstruction.":[71,167],"To":[72],"that":[73,125],"end,":[74],"we":[75],"explicitly":[76],"model":[77],"by":[79],"inferring":[80],"pose":[86],"geometry":[89,109],"use":[91],"as":[94,152],"corrective":[96],"cue":[97],"generating":[99],"pose.":[101],"This":[102],"enables":[103],"UniCon3R":[104,126],"jointly":[106],"recover":[107],"spatially":[111],"aligned":[112],"humans":[114],"within":[115],"Experiments":[118],"on":[119,130],"standard":[120],"human-centric":[121],"video":[122],"benchmarks":[123],"show":[124],"outperforms":[127],"state-of-the-art":[128],"baselines":[129],"physical":[131],"plausibility":[132],"global":[134],"motion":[136],"estimation":[137],"while":[138],"preserving":[139],"fast,":[140],"speeds.":[143],"The":[144],"results":[145],"validate":[146],"our":[147],"central":[148],"claim:":[149],"serves":[151],"powerful":[154],"internal":[155],"prior,":[156],"thus":[157],"establishing":[158],"new":[160],"paradigm":[161],"physically":[163],"grounded":[164],"joint":[165],"Project":[168],"page":[169],"available":[171],"at":[172],"https://surtantheta.github.io/UniCon3R":[173],".":[174]},"counts_by_year":[],"updated_date":"2026-05-13T06:04:23.736269","created_date":"2026-04-24T00:00:00"}
