{"id":"https://openalex.org/W7133602181","doi":"https://doi.org/10.48550/arxiv.2603.03798","title":"Learning Surgical Robotic Manipulation with 3D Spatial Priors","display_name":"Learning Surgical Robotic Manipulation with 3D Spatial Priors","publication_year":2026,"publication_date":"2026-03-04","ids":{"openalex":"https://openalex.org/W7133602181","doi":"https://doi.org/10.48550/arxiv.2603.03798"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.03798","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128202515","display_name":"Yu Sheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sheng, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128212023","display_name":"Lidian Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Lidian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128130950","display_name":"Xiaomeng Chu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chu, Xiaomeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089561793","display_name":"Jiajun Deng","orcid":"https://orcid.org/0000-0001-9624-7451"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Jiajun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128143979","display_name":"Min Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Min","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128152542","display_name":"Yanyong Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yanyong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101732526","display_name":"Bei Hua","orcid":"https://orcid.org/0000-0001-7281-8977"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hua, Bei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128154737","display_name":"Houqiang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Houqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128218004","display_name":"Jianmin Ji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Jianmin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5128202515"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10868","display_name":"Soft Robotics and Applications","score":0.3862999975681305,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10868","display_name":"Soft Robotics and Applications","score":0.3862999975681305,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10916","display_name":"Surgical Simulation and Training","score":0.26899999380111694,"subfield":{"id":"https://openalex.org/subfields/2746","display_name":"Surgery"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.11559999734163284,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5016999840736389},{"id":"https://openalex.org/keywords/stereopsis","display_name":"Stereopsis","score":0.47620001435279846},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.3889000117778778},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.35510000586509705},{"id":"https://openalex.org/keywords/spatial-relation","display_name":"Spatial relation","score":0.3488999903202057},{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.3352999985218048},{"id":"https://openalex.org/keywords/surgical-planning","display_name":"Surgical planning","score":0.33009999990463257},{"id":"https://openalex.org/keywords/robotic-surgery","display_name":"Robotic surgery","score":0.3264999985694885},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.31679999828338623}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.737500011920929},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6868000030517578},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6685000061988831},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5016999840736389},{"id":"https://openalex.org/C68537008","wikidata":"https://www.wikidata.org/wiki/Q247932","display_name":"Stereopsis","level":2,"score":0.47620001435279846},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.3889000117778778},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.35510000586509705},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.3488999903202057},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.3352999985218048},{"id":"https://openalex.org/C2779370443","wikidata":"https://www.wikidata.org/wiki/Q1776627","display_name":"Surgical planning","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C103203806","wikidata":"https://www.wikidata.org/wiki/Q942348","display_name":"Robotic surgery","level":2,"score":0.3264999985694885},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.31679999828338623},{"id":"https://openalex.org/C3017684034","wikidata":"https://www.wikidata.org/wiki/Q942348","display_name":"Surgical robot","level":3,"score":0.31369999051094055},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.2971000075340271},{"id":"https://openalex.org/C2778181360","wikidata":"https://www.wikidata.org/wiki/Q1074814","display_name":"Surgical instrument","level":2,"score":0.28940001130104065},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.2849000096321106},{"id":"https://openalex.org/C523889960","wikidata":"https://www.wikidata.org/wiki/Q160695","display_name":"Torso","level":2,"score":0.2822999954223633},{"id":"https://openalex.org/C35861506","wikidata":"https://www.wikidata.org/wiki/Q17141434","display_name":"Stereo cameras","level":3,"score":0.2782000005245209},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.27630001306533813},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2734000086784363},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.2515999972820282},{"id":"https://openalex.org/C3019007443","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3d model","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.03798","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.03798","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.03798","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.03798","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Achieving":[0],"3D":[1,105,111,134,140,157],"spatial":[2,106,112,179,200],"awareness":[3,107],"is":[4,68],"crucial":[5],"for":[6],"surgical":[7,23,83,102,143,204],"robotic":[8],"manipulation,":[9,27],"where":[10],"precise":[11],"and":[12,56,198,210,224],"delicate":[13],"operations":[14],"are":[15,166],"required.":[16],"Existing":[17],"methods":[18],"either":[19],"explicitly":[20],"reconstruct":[21],"the":[22,38,49,66,80,91,137,171],"scene":[24],"prior":[25],"to":[26,36,53,61,154],"or":[28],"enhance":[29],"multi-view":[30],"features":[31],"by":[32,108],"adding":[33],"wrist-mounted":[34,75],"cameras":[35,76],"supplement":[37],"default":[39],"stereo":[40,128,161],"endoscopes.":[41],"However,":[42],"both":[43],"paradigms":[44],"suffer":[45],"from":[46,160],"notable":[47],"limitations:":[48],"former":[50],"easily":[51],"leads":[52],"error":[54],"accumulation":[55],"prevents":[57],"end-to-end":[58,97],"optimization":[59],"due":[60],"its":[62],"multi-stage":[63],"nature,":[64],"while":[65],"latter":[67],"rarely":[69],"adopted":[70],"in":[71,115,142],"clinical":[72,220],"practice":[73],"since":[74],"can":[77],"interfere":[78],"with":[79,104,132,170],"motion":[81],"of":[82,139],"robot":[84],"arms.":[85],"In":[86],"this":[87],"work,":[88],"we":[89,119,148],"introduce":[90],"Spatial":[92],"Surgical":[93],"Transformer":[94],"(SST),":[95],"an":[96,185],"visuomotor":[98],"policy":[99],"that":[100,193],"empowers":[101],"robots":[103],"directly":[109],"exploring":[110],"cues":[113],"embedded":[114],"endoscopic":[116,129],"images.":[117,163],"First,":[118],"build":[120],"Surgical3D,":[121,147],"a":[122,150,176,215],"large-scale":[123],"photorealistic":[124],"dataset":[125,223],"containing":[126],"30K":[127],"image":[130],"pairs":[131],"accurate":[133],"geometry,":[135],"addressing":[136],"scarcity":[138],"data":[141],"scenes.":[144],"Based":[145],"on":[146,202],"finetune":[149],"powerful":[151],"geometric":[152],"transformer":[153],"extract":[155],"robust":[156],"latent":[158],"representations":[159,165],"endoscopes":[162],"These":[164],"then":[167],"seamlessly":[168],"aligned":[169],"robot's":[172],"action":[173],"space":[174],"via":[175],"lightweight":[177],"multi-level":[178],"feature":[180],"connector":[181],"(MSFC),":[182],"all":[183],"within":[184],"endoscope-centric":[186],"coordinate":[187],"frame.":[188],"Extensive":[189],"real-robot":[190],"experiments":[191],"demonstrate":[192],"SST":[194],"achieves":[195],"state-of-the-art":[196],"performance":[197],"strong":[199],"generalization":[201],"complex":[203],"tasks":[205],"such":[206],"as":[207],"knot":[208],"tying":[209],"ex-vivo":[211],"organ":[212],"dissection,":[213],"representing":[214],"significant":[216],"step":[217],"toward":[218],"practical":[219],"deployment.":[221],"The":[222],"code":[225],"will":[226],"be":[227],"released.":[228]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-06T00:00:00"}
