{"id":"https://openalex.org/W7134814748","doi":"https://doi.org/10.1109/lra.2026.3671536","title":"Value Explicit Pretraining for Learning Transferable Representations","display_name":"Value Explicit Pretraining for Learning Transferable Representations","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134814748","doi":"https://doi.org/10.1109/lra.2026.3671536"},"language":null,"primary_location":{"id":"doi:10.1109/lra.2026.3671536","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2026.3671536","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024597534","display_name":"Kiran Lekkala","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kiran Lekkala","raw_affiliation_strings":["Thomas Lord Department of Computer Science, University of Southern California, Los Angeles, CA, USA"],"raw_orcid":"https://orcid.org/0000-0003-0731-3918","affiliations":[{"raw_affiliation_string":"Thomas Lord Department of Computer Science, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113325593","display_name":"Henghui Bao","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Henghui Bao","raw_affiliation_strings":["Thomas Lord Department of Computer Science, University of Southern California, Los Angeles, CA, USA"],"raw_orcid":"https://orcid.org/0009-0007-9124-5079","affiliations":[{"raw_affiliation_string":"Thomas Lord Department of Computer Science, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017710939","display_name":"Sumedh Sontakke","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sumedh A. Sontakke","raw_affiliation_strings":["Thomas Lord Department of Computer Science, University of Southern California, Los Angeles, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Thomas Lord Department of Computer Science, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Erdem Byk","orcid":"https://orcid.org/0000-0002-9516-3130"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Erdem Byk","raw_affiliation_strings":["Thomas Lord Department of Computer Science, University of Southern California, Los Angeles, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-9516-3130","affiliations":[{"raw_affiliation_string":"Thomas Lord Department of Computer Science, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054494771","display_name":"Laurent Itti","orcid":"https://orcid.org/0000-0002-0168-2977"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Laurent Itti","raw_affiliation_strings":["Thomas Lord Department of Computer Science, University of Southern California, Los Angeles, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Thomas Lord Department of Computer Science, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5024597534"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.44538291,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"11","issue":"5","first_page":"5749","last_page":"5756"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.22599999606609344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.22599999606609344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.15080000460147858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.12960000336170197,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6313999891281128},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5475999712944031},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5404999852180481},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.47749999165534973},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.45509999990463257},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.39399999380111694},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.35249999165534973},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3434999883174896}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7138000130653381},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6592000126838684},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6313999891281128},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5475999712944031},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5404999852180481},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.47749999165534973},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.45509999990463257},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4339999854564667},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.39399999380111694},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.35249999165534973},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3434999883174896},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.3386000096797943},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.334199994802475},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.328900009393692},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32089999318122864},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.3167000114917755},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.313400000333786},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.2994000017642975},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.28369998931884766},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2816999852657318},{"id":"https://openalex.org/C2779321571","wikidata":"https://www.wikidata.org/wiki/Q7936605","display_name":"Visual learning","level":2,"score":0.27090001106262207},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2687000036239624},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C87698059","wikidata":"https://www.wikidata.org/wiki/Q1808960","display_name":"LTI system theory","level":3,"score":0.2565000057220459}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2026.3671536","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2026.3671536","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5421623203","display_name":null,"funder_award_id":"R61EY037527","funder_id":"https://openalex.org/F4320337350","funder_display_name":"National Eye Institute"},{"id":"https://openalex.org/G7671757787","display_name":null,"funder_award_id":"2318101","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337350","display_name":"National Eye Institute","ror":"https://ror.org/03wkg3b53"},{"id":"https://openalex.org/F4320338291","display_name":"Sandia National Laboratories","ror":"https://ror.org/01apwpt12"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2096733369","https://openalex.org/W2769112066","https://openalex.org/W2787666871","https://openalex.org/W4385431288","https://openalex.org/W4405786804","https://openalex.org/W7133201494"],"related_works":[],"abstract_inverted_index":{"Understanding":[0],"visual":[1,16],"inputs":[2],"for":[3,34],"a":[4,11,28,90,157],"given":[5],"task":[6,115],"amidst":[7],"varied":[8],"changes":[9,64],"is":[10,112,139],"key":[12],"challenge":[13],"posed":[14],"by":[15,53],"reinforcement":[17,36],"learning":[18,41,54],"agents.":[19],"We":[20],"propose":[21],"<italic":[22,75],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[23,76,186,196],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Value":[24],"Explicit":[25],"Pretraining</i>":[26],"(VEP),":[27],"method":[29,134],"that":[30,45,57,94,111,122,146,166],"learns":[31],"generalizable":[32],"representations":[33,59,121],"transfer":[35],"learning.":[37],"VEP":[38,167,181,206],"enables":[39,95],"efficient":[40],"of":[42,82,114,126,142,205],"new":[43],"tasks":[44,103],"share":[46],"similar":[47],"objectives":[48],"as":[49],"previously":[50],"learned":[51],"tasks,":[52],"an":[55],"encoder":[56,73],"trains":[58],"to":[60,63,98,176,178,184,194],"be":[61],"invariant":[62],"in":[65,118,190,200],"environment":[66],"dynamics":[67],"and":[68,84,135,161,192],"appearance.":[69],"To":[70],"pretrain":[71],"the":[72,96,106,124,127,136,140,151,162,174],"with":[74],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">suboptimal":[77],"unlabeled":[78,144],"demonstration":[79],"data</i>":[80],"(sequence":[81],"observations":[83],"sparse":[85],"reward":[86],"signals),":[87],"we":[88],"use":[89,141],"self-supervised":[91],"contrastive":[92],"loss":[93],"model":[97],"relate":[99],"states":[100],"across":[101],"different":[102],"based":[104],"on":[105,154,173],"Monte":[107],"Carlo":[108],"value":[109],"estimate":[110],"reflective":[113],"progress,":[116],"resulting":[117],"temporally":[119],"smooth":[120],"capture":[123],"objective":[125],"task.":[128,152],"A":[129],"major":[130],"difference":[131],"between":[132],"our":[133,209],"existing":[137],"approaches":[138],"suboptimal":[143],"data":[145],"do":[147],"not":[148],"always":[149],"solve":[150],"Experiments":[153],"Ant":[155],"locomotion,":[156],"realistic":[158],"navigation":[159],"simulator":[160],"Atari":[163],"benchmark":[164],"show":[165],"outperforms":[168],"current":[169],"SoTA":[170],"pretraining":[171],"methods":[172],"ability":[175],"generalize":[177],"unseen":[179],"tasks.":[180],"achieves":[182],"up":[183,193],"<inline-formula":[185,195],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[187,197],"notation=\"LaTeX\">$2\\times$</tex-math></inline-formula>":[188],"improvement":[189,199],"rewards,":[191],"notation=\"LaTeX\">$3\\times$</tex-math></inline-formula>":[198],"sample":[201],"efficiency.":[202],"For":[203],"videos":[204],"policies,":[207],"visit":[208],"website.":[210]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-11T00:00:00"}
