{"id":"https://openalex.org/W3206104157","doi":"https://doi.org/10.1109/icra48506.2021.9561799","title":"Zero-shot Policy Learning with Spatial Temporal Reward Decomposition on Contingency-aware Observation","display_name":"Zero-shot Policy Learning with Spatial Temporal Reward Decomposition on Contingency-aware Observation","publication_year":2021,"publication_date":"2021-05-30","ids":{"openalex":"https://openalex.org/W3206104157","doi":"https://doi.org/10.1109/icra48506.2021.9561799","mag":"3206104157"},"language":"en","primary_location":{"id":"doi:10.1109/icra48506.2021.9561799","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9561799","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049093671","display_name":"Huazhe Xu","orcid":"https://orcid.org/0000-0001-8578-1261"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Huazhe Xu","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061701229","display_name":"Boyuan Chen","orcid":"https://orcid.org/0000-0001-9103-5820"},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Boyuan Chen","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074250521","display_name":"Yang Gao","orcid":"https://orcid.org/0000-0002-2488-1813"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Gao","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029105520","display_name":"Trevor Darrell","orcid":"https://orcid.org/0000-0001-5453-8533"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Trevor Darrell","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5049093671"],"corresponding_institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14729221,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"99","issue":null,"first_page":"10786","last_page":"10792"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9836999773979187,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/contingency","display_name":"Contingency","score":0.6535806655883789},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6479872465133667},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.6476436853408813},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.5711647272109985},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.564351499080658},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5089490413665771},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3260385990142822}],"concepts":[{"id":"https://openalex.org/C97508593","wikidata":"https://www.wikidata.org/wiki/Q1359069","display_name":"Contingency","level":2,"score":0.6535806655883789},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6479872465133667},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.6476436853408813},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.5711647272109985},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.564351499080658},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5089490413665771},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3260385990142822},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra48506.2021.9561799","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9561799","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320317364","display_name":"BD","ror":"https://ror.org/048vrgr14"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":87,"referenced_works":["https://openalex.org/W172298727","https://openalex.org/W1191599655","https://openalex.org/W1594849649","https://openalex.org/W1777239053","https://openalex.org/W1933657216","https://openalex.org/W1978956894","https://openalex.org/W1988526405","https://openalex.org/W2004915807","https://openalex.org/W2061562262","https://openalex.org/W2080379318","https://openalex.org/W2113023245","https://openalex.org/W2121103318","https://openalex.org/W2126105931","https://openalex.org/W2145339207","https://openalex.org/W2158969944","https://openalex.org/W2161009228","https://openalex.org/W2164469736","https://openalex.org/W2167224731","https://openalex.org/W2173248099","https://openalex.org/W2174803659","https://openalex.org/W2254120882","https://openalex.org/W2470789286","https://openalex.org/W2561776174","https://openalex.org/W2620290674","https://openalex.org/W2736601468","https://openalex.org/W2785693718","https://openalex.org/W2789008106","https://openalex.org/W2807193538","https://openalex.org/W2809013025","https://openalex.org/W2809050457","https://openalex.org/W2898436992","https://openalex.org/W2898802014","https://openalex.org/W2899077443","https://openalex.org/W2899205164","https://openalex.org/W2899508538","https://openalex.org/W2920722457","https://openalex.org/W2944746440","https://openalex.org/W2949475445","https://openalex.org/W2952011561","https://openalex.org/W2952578114","https://openalex.org/W2963124592","https://openalex.org/W2963126744","https://openalex.org/W2963523627","https://openalex.org/W2963611966","https://openalex.org/W2963689319","https://openalex.org/W2963864421","https://openalex.org/W2964001908","https://openalex.org/W2964067469","https://openalex.org/W2967293465","https://openalex.org/W2967895468","https://openalex.org/W2970387978","https://openalex.org/W2970705602","https://openalex.org/W3003474222","https://openalex.org/W3027397596","https://openalex.org/W4293872189","https://openalex.org/W4297744728","https://openalex.org/W4300799055","https://openalex.org/W4301141993","https://openalex.org/W6627932998","https://openalex.org/W6634004297","https://openalex.org/W6638088447","https://openalex.org/W6676728370","https://openalex.org/W6678367057","https://openalex.org/W6678637614","https://openalex.org/W6684338915","https://openalex.org/W6684921986","https://openalex.org/W6685664872","https://openalex.org/W6691666995","https://openalex.org/W6720060057","https://openalex.org/W6729788943","https://openalex.org/W6730641667","https://openalex.org/W6738165965","https://openalex.org/W6740801417","https://openalex.org/W6741002519","https://openalex.org/W6741977017","https://openalex.org/W6748306599","https://openalex.org/W6748599296","https://openalex.org/W6751569447","https://openalex.org/W6752275872","https://openalex.org/W6755654293","https://openalex.org/W6755655695","https://openalex.org/W6756103505","https://openalex.org/W6756257017","https://openalex.org/W6756303580","https://openalex.org/W6760325134","https://openalex.org/W6762548593","https://openalex.org/W6766413688"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W2366718574","https://openalex.org/W2359774528","https://openalex.org/W4298312966","https://openalex.org/W2325697621"],"abstract_inverted_index":{"It":[0],"is":[1,50,74,82],"a":[2,32,128,139,151,155,167,178,207,213,224],"long-standing":[3],"challenge":[4],"to":[5,10,17,84,176,185,192,231],"enable":[6],"an":[7,18],"intelligent":[8,42],"agent":[9,49],"learn":[11,166],"in":[12,56,64,76,132,150],"one":[13],"environment":[14,20],"and":[15,25,45,103,170],"generalize":[16],"unseen":[19],"without":[21,88],"further":[22,165],"data":[23],"collection":[24],"finetuning.":[26],"In":[27],"this":[28,97,125],"paper,":[29],"we":[30,164],"consider":[31],"zero":[33],"shot":[34],"generalization":[35,46],"problem":[36],"setup":[37],"that":[38,142,195],"complies":[39],"with":[40,53,61,91,117],"biological":[41,101],"agents\u2019":[43],"learning":[44,120],"processes.":[47],"The":[48],"first":[51],"presented":[52],"previous":[54,110],"experiences":[55,131],"the":[57,65,77,86,92,105,133,144,181,232],"training":[58,134],"environment,":[59,80,135],"along":[60,116],"task":[62,87],"description":[63],"form":[66],"of":[67,130,198,210],"trajectory-level":[68],"sparse":[69,145],"rewards.":[70],"Later":[71],"when":[72],"it":[73,81],"placed":[75],"new":[78,193],"testing":[79,93],"asked":[83],"perform":[85,122],"any":[89],"interaction":[90],"environment.":[94],"We":[95,202],"find":[96],"setting":[98],"natural":[99],"for":[100,109,235],"creatures":[102],"at":[104],"same":[106],"time,":[107],"challenging":[108],"methods.":[111],"Behavior":[112],"cloning,":[113],"state-of-art":[114],"RL":[115],"other":[118],"zero-shot":[119],"methods":[121],"poorly":[123],"on":[124,160,206],"benchmark.":[126],"Given":[127],"set":[129],"our":[136,204],"method":[137,205],"learns":[138],"neural":[140],"function":[141],"decomposes":[143],"reward":[146],"into":[147],"particular":[148],"regions":[149],"contingency-aware":[152],"observation":[153],"as":[154,221,223],"per":[156],"step":[157],"reward.":[158],"Based":[159],"such":[161],"decomposed":[162,184],"rewards,":[163],"dynamics":[168],"model":[169],"use":[171],"Model":[172],"Predictive":[173],"Control":[174],"(MPC)":[175],"obtain":[177],"policy.":[179],"Since":[180],"rewards":[182],"are":[183,189,196],"finer-granularity":[186],"observations,":[187],"they":[188],"naturally":[190],"generalizable":[191],"environments":[194],"composed":[197],"similar":[199],"basic":[200],"elements.":[201],"demonstrate":[203],"wide":[208],"range":[209],"environments,":[211],"including":[212],"classic":[214],"video":[215],"game":[216],"\u2013":[217],"Super":[218],"Mario":[219],"Bros,":[220],"well":[222],"robotic":[225],"continuous":[226],"control":[227],"task.":[228],"Please":[229],"refer":[230],"project":[233],"page":[234],"more":[236],"visualized":[237],"results.":[238],"<sup":[239],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[240],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[241]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
