{"id":"https://openalex.org/W7123336214","doi":"https://doi.org/10.1109/lra.2026.3653314","title":"PoCoDP3: Pose- and Contact-Aware Visual-Tactile Policy for Contact-Rich 3D Manipulation","display_name":"PoCoDP3: Pose- and Contact-Aware Visual-Tactile Policy for Contact-Rich 3D Manipulation","publication_year":2026,"publication_date":"2026-01-12","ids":{"openalex":"https://openalex.org/W7123336214","doi":"https://doi.org/10.1109/lra.2026.3653314"},"language":null,"primary_location":{"id":"doi:10.1109/lra.2026.3653314","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2026.3653314","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122026392","display_name":"Zhaokun Yue","orcid":null},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhaokun Yue","raw_affiliation_strings":["School of Automation, Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0009-0006-4671-473X","affiliations":[{"raw_affiliation_string":"School of Automation, Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101515133","display_name":"Ling Tong","orcid":"https://orcid.org/0000-0003-0203-7838"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ling Tong","raw_affiliation_strings":["School of Automation, Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0003-0203-7838","affiliations":[{"raw_affiliation_string":"School of Automation, Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5122861935","display_name":"Kun Qian","orcid":null},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Qian","raw_affiliation_strings":["Southeast University Shenzhen Research Institute, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-7429-1742","affiliations":[{"raw_affiliation_string":"Southeast University Shenzhen Research Institute, Shenzhen, China","institution_ids":["https://openalex.org/I76569877"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5122026392"],"corresponding_institution_ids":["https://openalex.org/I76569877"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06588783,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"11","issue":"3","first_page":"2434","last_page":"2441"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.635200023651123,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.635200023651123,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10914","display_name":"Tactile and Sensory Interactions","score":0.15440000593662262,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10338","display_name":"Advanced Sensor and Energy Harvesting Materials","score":0.04470000043511391,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.698199987411499},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5475000143051147},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.49959999322891235},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.46639999747276306},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.4207000136375427},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.3921000063419342},{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.3756999969482422},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.3732999861240387}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7574999928474426},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.698199987411499},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5852000117301941},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5475000143051147},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.49959999322891235},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.46639999747276306},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.43309998512268066},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.4207000136375427},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3978999853134155},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.3921000063419342},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.3756999969482422},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.3732999861240387},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.35679998993873596},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.33970001339912415},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.33799999952316284},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.31119999289512634},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.30379998683929443},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2939999997615814},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28999999165534973},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.28049999475479126},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.26739999651908875},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.2671999931335449},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.25440001487731934}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2026.3653314","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2026.3653314","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2989300276","display_name":null,"funder_award_id":"2025A1515010397","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"}],"funders":[{"id":"https://openalex.org/F4320324856","display_name":"Southeast University","ror":"https://ror.org/04ct4d772"},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2580438233","https://openalex.org/W3203302482","https://openalex.org/W3205981435","https://openalex.org/W4206748793","https://openalex.org/W4385403811","https://openalex.org/W4386607259","https://openalex.org/W4386858201","https://openalex.org/W4390776907","https://openalex.org/W4396604919","https://openalex.org/W4402354007","https://openalex.org/W4402354045","https://openalex.org/W4402716100","https://openalex.org/W4404688134","https://openalex.org/W4405265847","https://openalex.org/W4405785353","https://openalex.org/W4405785980","https://openalex.org/W4409326585","https://openalex.org/W4412171122","https://openalex.org/W4414050430","https://openalex.org/W4414050525"],"related_works":[],"abstract_inverted_index":{"Imitation":[0],"learning":[1],"in":[2,37,69,150],"contact-rich":[3,70,94],"tasks":[4,139],"requires":[5],"both":[6,153],"global":[7],"spatial":[8],"awareness":[9],"and":[10,29,54,63,83,137,147,155],"fine-grained":[11],"in-hand":[12,85],"interaction":[13,108],"understanding.":[14],"However,":[15],"vision-only":[16],"policies":[17,149],"based":[18,105],"on":[19,106],"images":[20],"or":[21,41],"point":[22,61],"clouds":[23,62],"are":[24],"often":[25],"susceptible":[26],"to":[27,31,66,123],"occlusion":[28],"struggle":[30],"capture":[32],"critical":[33],"contact":[34,81],"details,":[35],"particularly":[36],"visually":[38],"ambiguous":[39],"regions":[40],"during":[42],"subtle":[43],"tactile":[44,64,76,90],"interactions.":[45],"In":[46],"this":[47],"work,":[48],"we":[49],"present":[50],"PoCoDP3,":[51],"a":[52,74,115],"pose-":[53],"contact-aware":[55],"visual-tactile":[56,112],"policy":[57,118],"that":[58,78,141],"integrates":[59],"3D":[60,148],"inputs":[65],"generate":[67],"actions":[68],"tasks.":[71],"PoCoDP3":[72,142],"introduces":[73],"dual-branch":[75],"encoder":[77],"jointly":[79],"models":[80],"dynamics":[82],"estimates":[84],"object":[86],"pose,":[87],"enabling":[88,110],"structured":[89],"representations":[91],"for":[92],"precise":[93],"manipulation.":[95],"A":[96],"contact-driven":[97],"cross-modal":[98],"fusion":[99],"mechanism":[100],"adaptively":[101],"prioritizes":[102],"sensory":[103],"modalities":[104],"real-time":[107],"cues,":[109],"efficient":[111],"integration.":[113],"Moreover,":[114],"reference-guided":[116],"diffusion":[117],"leverages":[119],"reference":[120],"action":[121,132],"offsets":[122],"reduce":[124],"sampling":[125],"steps,":[126],"significantly":[127],"accelerating":[128],"inference":[129,156],"while":[130],"maintaining":[131],"quality.":[133],"Experiments":[134],"across":[135],"simulation":[136],"real-world":[138],"demonstrate":[140],"consistently":[143],"outperforms":[144],"representative":[145],"2D":[146],"terms":[151],"of":[152],"accuracy":[154],"efficiency.":[157]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-01-14T00:00:00"}
