{"id":"https://openalex.org/W7154322995","doi":"https://doi.org/10.48550/arxiv.2604.10677","title":"LIDEA: Human-to-Robot Imitation Learning via Implicit Feature Distillation and Explicit Geometry Alignment","display_name":"LIDEA: Human-to-Robot Imitation Learning via Implicit Feature Distillation and Explicit Geometry Alignment","publication_year":2026,"publication_date":"2026-04-12","ids":{"openalex":"https://openalex.org/W7154322995","doi":"https://doi.org/10.48550/arxiv.2604.10677"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10677","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10677","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10677","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130787349","display_name":"Yifu Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Yifu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133623522","display_name":"Bokai Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Bokai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133568048","display_name":"Xinyu Zhan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhan, Xinyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133577696","display_name":"Hongjie Fang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Hongjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133554190","display_name":"Yong-Lu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yong-Lu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133602550","display_name":"Cewu Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Cewu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133623062","display_name":"Lixin Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Lixin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5130787349"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.2502000033855438,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.2502000033855438,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.2151000052690506,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.12860000133514404,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5891000032424927},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4934000074863434},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.46149998903274536},{"id":"https://openalex.org/keywords/transitive-relation","display_name":"Transitive relation","score":0.4462999999523163},{"id":"https://openalex.org/keywords/human\u2013robot-interaction","display_name":"Human\u2013robot interaction","score":0.4392000138759613},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.4221999943256378},{"id":"https://openalex.org/keywords/human-visual-system-model","display_name":"Human visual system model","score":0.3889999985694885}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7157999873161316},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6722999811172485},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5891000032424927},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4934000074863434},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.46149998903274536},{"id":"https://openalex.org/C191399111","wikidata":"https://www.wikidata.org/wiki/Q64861","display_name":"Transitive relation","level":2,"score":0.4462999999523163},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.4392000138759613},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.4221999943256378},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4138999879360199},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.3889999985694885},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3555999994277954},{"id":"https://openalex.org/C2779321571","wikidata":"https://www.wikidata.org/wiki/Q7936605","display_name":"Visual learning","level":2,"score":0.34779998660087585},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.3474000096321106},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.34279999136924744},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3343999981880188},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30160000920295715},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.29010000824928284},{"id":"https://openalex.org/C2983761899","wikidata":"https://www.wikidata.org/wiki/Q604674","display_name":"Robot vision","level":4,"score":0.2754000127315521},{"id":"https://openalex.org/C2780735816","wikidata":"https://www.wikidata.org/wiki/Q28324931","display_name":"Incremental learning","level":2,"score":0.2750000059604645}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10677","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10677","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10677","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10677","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Scaling":[0],"up":[1,153],"robot":[2,32,105,158],"learning":[3,79,84],"is":[4],"hindered":[5],"by":[6],"the":[7,25,90,113,161],"scarcity":[8],"of":[9,20,156],"robotic":[10],"demonstrations,":[11,159],"whereas":[12],"human":[13,29,87,103,150,168],"videos":[14,169],"offer":[15],"a":[16,35,96,108],"vast,":[17],"untapped":[18],"source":[19],"interaction":[21,128],"data.":[22],"However,":[23],"bridging":[24],"embodiment":[26,126],"gap":[27],"between":[28],"hands":[30],"and":[31,60,73,104,144,160],"arms":[33],"remains":[34],"critical":[36],"challenge.":[37],"Existing":[38],"cross-embodiment":[39],"transfer":[40],"strategies":[41],"typically":[42],"rely":[43],"on":[44],"visual":[45,51,58,92],"editing,":[46],"but":[47],"they":[48],"often":[49],"introduce":[50,68],"artifacts":[52],"due":[53],"to":[54,154],"intrinsic":[55],"discrepancies":[56],"in":[57,81,107],"appearance":[59],"3D":[61,114],"geometry.":[62],"To":[63],"address":[64],"these":[65],"limitations,":[66],"we":[67,117],"LIDEA":[69,94,138],"(Implicit":[70],"Feature":[71],"Distillation":[72],"Explicit":[74],"Geometric":[75],"Alignment),":[76],"an":[77,119],"imitation":[78],"framework":[80,162],"which":[82],"policy":[83],"benefits":[85],"from":[86,127,139,167],"demonstrations.":[88],"In":[89,112],"2D":[91],"domain,":[93,116],"employs":[95],"dual-stage":[97],"transitive":[98],"distillation":[99],"pipeline":[100],"that":[101,123,149],"aligns":[102],"representations":[106],"shared":[109],"latent":[110],"space.":[111],"geometric":[115],"propose":[118],"embodiment-agnostic":[120],"alignment":[121],"strategy":[122],"explicitly":[124],"decouples":[125],"geometry,":[129],"ensuring":[130],"consistent":[131],"3D-aware":[132],"perception.":[133],"Extensive":[134],"experiments":[135],"empirically":[136],"validate":[137],"two":[140],"perspectives:":[141],"data":[142,151],"efficiency":[143],"OOD":[145],"robustness.":[146],"Results":[147],"show":[148],"substitutes":[152],"80%":[155],"costly":[157],"successfully":[163],"transfers":[164],"unseen":[165],"patterns":[166],"for":[170],"out-of-distribution":[171],"generalization.":[172]},"counts_by_year":[],"updated_date":"2026-04-15T06:04:33.058270","created_date":"2026-04-15T00:00:00"}
