{"id":"https://openalex.org/W7138282608","doi":"https://doi.org/10.48550/arxiv.2603.14498","title":"R3DP: Real-Time 3D-Aware Policy for Embodied Manipulation","display_name":"R3DP: Real-Time 3D-Aware Policy for Embodied Manipulation","publication_year":2026,"publication_date":"2026-03-15","ids":{"openalex":"https://openalex.org/W7138282608","doi":"https://doi.org/10.48550/arxiv.2603.14498"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.14498","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14498","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.14498","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129691510","display_name":"Yuhao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Yuhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129701376","display_name":"Wanxi Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Wanxi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129708516","display_name":"Yue Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Yue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129677908","display_name":"Yi Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Yi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129703805","display_name":"Jingnan Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Jingnan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129689233","display_name":"Qiaochu Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Qiaochu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006651724","display_name":"Yaxing Lyu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lyu, Yaxing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129735125","display_name":"Zhixuan Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Zhixuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100767941","display_name":"Yibin Liu","orcid":"https://orcid.org/0000-0001-5021-9412"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yibin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029589907","display_name":"Congsheng Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Congsheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129746253","display_name":"Xianda Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Xianda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129663701","display_name":"Wei Sui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sui, Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129726189","display_name":"Yaohui Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Yaohui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129671707","display_name":"Xiaokang Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xiaokang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100704962","display_name":"Yanyan Xu","orcid":"https://orcid.org/0000-0001-5429-3177"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yanyan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129677490","display_name":"Yao Mu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mu, Yao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":16,"corresponding_author_ids":["https://openalex.org/A5129691510"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.6675000190734863,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.6675000190734863,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.06970000267028809,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.04859999939799309,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.7111999988555908},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6154000163078308},{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.5910999774932861},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5192000269889832},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.512499988079071},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.45329999923706055},{"id":"https://openalex.org/keywords/intrinsics","display_name":"Intrinsics","score":0.3939000070095062}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.795199990272522},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.7111999988555908},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6154000163078308},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.5910999774932861},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.574999988079071},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5192000269889832},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.512499988079071},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47380000352859497},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.45329999923706055},{"id":"https://openalex.org/C2908650547","wikidata":"https://www.wikidata.org/wiki/Q20999234","display_name":"Intrinsics","level":2,"score":0.3939000070095062},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3382999897003174},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.3312999904155731},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.31619998812675476},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.30790001153945923},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.2703999876976013}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.14498","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14498","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.14498","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.14498","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.44651496410369873}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Embodied":[0],"manipulation":[1,47],"requires":[2],"accurate":[3],"3D":[4,20,44,68,182,206],"understanding":[5],"of":[6,56],"objects":[7],"and":[8,14,153,190,195],"their":[9,26],"spatial":[10],"relations":[11],"to":[12,104,115,130,184,220],"plan":[13],"execute":[15],"contact-rich":[16],"actions.":[17],"While":[18],"large-scale":[19,67,181],"vision":[21],"models":[22,163],"provide":[23],"strong":[24],"priors,":[25],"computational":[27],"cost":[28],"incurs":[29],"prohibitive":[30],"latency":[31],"for":[32,107,160],"real-time":[33,51,75,80,165],"control.":[34],"We":[35,168],"propose":[36],"Real-time":[37],"3D-aware":[38],"Policy":[39],"(R3DP),":[40],"which":[41,64],"integrates":[42,66],"powerful":[43],"priors":[45,69,183],"into":[46,70,164],"policies":[48],"without":[49,73],"sacrificing":[50],"performance.":[52,76],"A":[53],"core":[54],"innovation":[55],"R3DP":[57,155,170,178,211],"is":[58],"the":[59,71,84],"asynchronous":[60],"fast-slow":[61],"collaboration":[62],"module,":[63],"seamlessly":[65],"policy":[72,209],"compromising":[74],"The":[77],"system":[78,87],"maintains":[79],"efficiency":[81],"by":[82,148,193,203],"querying":[83],"pre-trained":[85],"slow":[86],"(VGGT)":[88],"only":[89],"on":[90],"sparse":[91],"key":[92],"frames,":[93],"while":[94],"simultaneously":[95],"employing":[96],"a":[97,138,157,213,221],"lightweight":[98],"Temporal":[99],"Feature":[100,140],"Prediction":[101],"Network":[102],"(TFPNet)":[103],"predict":[105],"features":[106,145],"all":[108],"intermediate":[109],"frames.":[110],"By":[111],"leveraging":[112],"historical":[113],"data":[114],"exploit":[116],"temporal":[117],"correlations,":[118],"TFPNet":[119],"explicitly":[120,149],"improves":[121],"task":[122],"success":[123,199],"rates":[124],"through":[125],"consistent":[126],"feature":[127],"estimation.":[128],"Additionally,":[129],"enable":[131],"more":[132],"effective":[133],"multi-view":[134,191],"fusion,":[135],"we":[136],"introduce":[137],"Multi-View":[139],"Fuser":[141],"(MVFF)":[142],"that":[143],"aggregates":[144],"across":[146,174],"views":[147],"incorporating":[150],"camera":[151],"intrinsics":[152],"extrinsics.":[154],"offers":[156],"plug-and-play":[158],"solution":[159],"integrating":[161],"large":[162],"inference":[166,217],"systems.":[167],"evaluate":[169],"against":[171],"multiple":[172],"baselines":[173],"different":[175],"visual":[176],"configurations.":[177],"effectively":[179],"harnesses":[180],"achieve":[185],"superior":[186],"results,":[187],"outperforming":[188],"single-view":[189],"DP":[192],"32.9%":[194],"51.4%":[196],"in":[197,216],"average":[198],"rate,":[200],"respectively.":[201],"Furthermore,":[202],"decoupling":[204],"heavy":[205],"reasoning":[207],"from":[208],"execution,":[210],"achieves":[212],"44.8%":[214],"reduction":[215],"time":[218],"compared":[219],"naive":[222],"DP+VGGT":[223],"integration.":[224]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
