{"id":"https://openalex.org/W7134846473","doi":"https://doi.org/10.48550/arxiv.2603.07691","title":"RoboPCA: Pose-centered Affordance Learning from Human Demonstrations for Robot Manipulation","display_name":"RoboPCA: Pose-centered Affordance Learning from Human Demonstrations for Robot Manipulation","publication_year":2026,"publication_date":"2026-03-08","ids":{"openalex":"https://openalex.org/W7134846473","doi":"https://doi.org/10.48550/arxiv.2603.07691"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.07691","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Xiao, Zhanqi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Zhanqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128633864","display_name":"Ruiping Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Ruiping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128658759","display_name":"Xilin Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xilin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9480999708175659,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9480999708175659,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.00800000037997961,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.005900000222027302,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/affordance","display_name":"Affordance","score":0.9624999761581421},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6568999886512756},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6154999732971191},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5568000078201294},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5511999726295471},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5436000227928162},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5205000042915344},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.43549999594688416},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.3752000033855438}],"concepts":[{"id":"https://openalex.org/C194995250","wikidata":"https://www.wikidata.org/wiki/Q531136","display_name":"Affordance","level":2,"score":0.9624999761581421},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7411999702453613},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6568999886512756},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6417999863624573},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6154999732971191},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.5626000165939331},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5568000078201294},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5511999726295471},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5436000227928162},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5205000042915344},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.43549999594688416},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.3752000033855438},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3736000061035156},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.3700000047683716},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.36410000920295715},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.349700003862381},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3278000056743622},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.31349998712539673},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.31029999256134033},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C20894473","wikidata":"https://www.wikidata.org/wiki/Q1116105","display_name":"Object model","level":3,"score":0.26910001039505005},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.25459998846054077}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.07691","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.07691","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.07691","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.07691","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.5036505460739136,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Understanding":[0],"spatial":[1,31],"affordances":[2],"--":[3,16],"comprising":[4],"the":[5,12,39,44,124,148,161,166,188],"contact":[6,14,40,62,82,149],"regions":[7,41,63,83,186],"of":[8],"object":[9,135,145,185],"interaction":[10,125,154],"and":[11,25,64,84,111,123,134,151,178,199,202,208],"corresponding":[13],"poses":[15,85],"is":[17],"essential":[18],"for":[19,94],"robots":[20],"to":[21,46,54,58,130,156,165,182],"effectively":[22],"manipulate":[23],"objects":[24],"accomplish":[26],"diverse":[27],"tasks.":[28],"However,":[29],"existing":[30],"affordance":[32,75,96,114,139],"prediction":[33,76],"methods":[34,194],"mainly":[35],"focus":[36],"on":[37,87,195],"locating":[38],"while":[42,137],"delegating":[43],"pose":[45,48],"independent":[47],"estimation":[49],"approaches,":[50],"which":[51],"can":[52],"lead":[53],"task":[55],"failures":[56],"due":[57],"inconsistencies":[59],"between":[60],"predicted":[61],"candidate":[65],"poses.":[66],"In":[67],"this":[68],"work,":[69],"we":[70,98],"propose":[71],"RoboPCA,":[72],"a":[73,101,158],"pose-centered":[74,95,113,138],"framework":[77],"that":[78,105],"jointly":[79],"predicts":[80],"task-appropriate":[81],"conditioned":[86],"instructions.":[88],"To":[89],"enable":[90],"scalable":[91],"data":[92,102],"collection":[93],"learning,":[97],"devise":[99],"Human2Afford,":[100,120],"curation":[103],"pipeline":[104],"automatically":[106],"recovers":[107],"scene-level":[108],"3D":[109,132,162],"information":[110],"infers":[112],"annotations":[115,140],"from":[116,160],"human":[117],"demonstrations.":[118],"With":[119],"scene":[121],"depth":[122],"object's":[126],"mask":[127],"are":[128,141],"extracted":[129],"provide":[131],"context":[133],"localization,":[136],"obtained":[142],"by":[143],"tracking":[144],"points":[146],"within":[147],"region":[150],"analyzing":[152],"hand-object":[153],"patterns":[155],"establish":[157],"mapping":[159],"hand":[163],"mesh":[164],"robot":[167],"end-effector":[168],"orientation.":[169],"By":[170],"integrating":[171],"geometry-appearance":[172],"cues":[173],"through":[174],"an":[175],"RGB-D":[176],"encoder":[177],"incorporating":[179],"mask-enhanced":[180],"features":[181],"emphasize":[183],"task-relevant":[184],"into":[187],"diffusion-based":[189],"framework,":[190],"RoboPCA":[191],"outperforms":[192],"baseline":[193],"image":[196],"datasets,":[197],"simulation,":[198],"real":[200],"robots,":[201],"exhibits":[203],"strong":[204],"generalization":[205],"across":[206],"tasks":[207],"categories.":[209]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-11T00:00:00"}
