{"id":"https://openalex.org/W7162082156","doi":"https://doi.org/10.48550/arxiv.2605.22123","title":"Beyond Pixels: Learning Invariant Rewards for Real-World Robotics From a Few Demonstrations","display_name":"Beyond Pixels: Learning Invariant Rewards for Real-World Robotics From a Few Demonstrations","publication_year":2026,"publication_date":"2026-05-21","ids":{"openalex":"https://openalex.org/W7162082156","doi":"https://doi.org/10.48550/arxiv.2605.22123"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.22123","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22123","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.22123","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109786566","display_name":"Tengye Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Tengye","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136735954","display_name":"Yangting Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yangting","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136744481","display_name":"Ziju Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Ziju","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058818348","display_name":"Guanqi Chen","orcid":"https://orcid.org/0000-0002-1440-3340"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Guanqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100460018","display_name":"Zhen Fu","orcid":"https://orcid.org/0009-0009-4831-2437"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Zhen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136757890","display_name":"Chen yizhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"yizhou, Chen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136781978","display_name":"Hua Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Hua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136753332","display_name":"Jia Pan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pan, Jia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6912000179290771,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6912000179290771,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.19200000166893005,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.024399999529123306,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6733999848365784},{"id":"https://openalex.org/keywords/memorization","display_name":"Memorization","score":0.635699987411499},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.6255999803543091},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5277000069618225},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.4878999888896942},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4553000032901764},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.41909998655319214},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.3659000098705292}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6837999820709229},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6733999848365784},{"id":"https://openalex.org/C30038468","wikidata":"https://www.wikidata.org/wiki/Q4354775","display_name":"Memorization","level":2,"score":0.635699987411499},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.6255999803543091},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.621999979019165},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5277000069618225},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.4878999888896942},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4553000032901764},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4546000063419342},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.41909998655319214},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.3659000098705292},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.3634999990463257},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3158999979496002},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.3077000081539154},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.30559998750686646},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.29280000925064087},{"id":"https://openalex.org/C87698059","wikidata":"https://www.wikidata.org/wiki/Q1808960","display_name":"LTI system theory","level":3,"score":0.27549999952316284},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.27399998903274536},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.27230000495910645},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2648000121116638},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.2558000087738037}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.22123","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22123","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.22123","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22123","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Designing":[0],"reward":[1,40,67,106,174,186],"functions":[2,68],"that":[3,63,91,108,125,145,170],"generalize":[4,51],"beyond":[5,52],"controlled":[6],"laboratory":[7],"settings":[8],"remains":[9],"a":[10,22,61,104,121,184],"fundamental":[11],"challenge":[12],"in":[13,27,195],"reinforcement":[14],"learning":[15],"for":[16],"robotics.":[17],"In":[18],"open-world":[19],"manipulation":[20,142],"problems,":[21],"single":[23,185],"task":[24,193],"can":[25],"appear":[26],"numerous":[28],"variants":[29,194],"through":[30],"different":[31],"object":[32,181],"instances,":[33],"positions,":[34],"and":[35,48,112,120,139,152,180],"camera":[36],"viewpoints.":[37],"Recent":[38],"vision-based":[39],"models":[41],"tend":[42],"to":[43,50,78,83,158,177,188],"memorize":[44],"specific":[45],"pixel":[46],"distributions":[47],"fail":[49],"their":[53],"training":[54],"conditions.":[55],"To":[56],"address":[57],"this,":[58],"we":[59],"propose":[60],"framework":[62,99],"learns":[64],"invariant":[65],"symbolic":[66],"from":[69,80,129],"as":[70,72],"few":[71],"five":[73],"demonstrations.":[74],"The":[75,98],"insight":[76],"is":[77],"shift":[79],"visual":[81,96],"feature-fitting":[82],"the":[84,171],"discovery":[85],"of":[86],"behavioral":[87],"invariants:":[88],"task-level":[89,110],"properties":[90],"remain":[92],"constant":[93],"across":[94,191],"diverse":[95,192],"instantiations.":[97],"has":[100],"two":[101],"coupled":[102],"components:":[103],"structural":[105],"formulation":[107],"encodes":[109],"strategies":[111],"physical":[113],"constraints":[114],"while":[115],"preserving":[116],"optimal":[117],"policy":[118,153,162],"invariance,":[119],"hybrid":[122],"symbolic-numerical":[123],"procedure":[124],"distills":[126],"these":[127],"invariants":[128],"demonstrations":[130],"without":[131],"online":[132],"interaction.":[133],"Experiments":[134],"on":[135],"eight":[136],"Meta-World":[137],"tasks":[138,143],"three":[140],"Franka":[141],"demonstrate":[144],"our":[146],"method":[147],"achieves":[148],"stronger":[149],"process":[150],"alignment":[151],"rollout":[154],"ranking":[155],"abilities":[156],"compared":[157],"baselines,":[159],"accelerating":[160],"downstream":[161],"learning.":[163],"Three":[164],"real-world":[165],"out-of-distribution":[166],"experiments":[167],"further":[168],"show":[169],"same":[172],"learned":[173],"generalizes":[175],"zero-shot":[176],"position,":[178],"viewpoint,":[179],"variations,":[182],"enabling":[183],"representation":[187],"be":[189],"reused":[190],"practice.":[196]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-23T00:00:00"}
