{"id":"https://openalex.org/W4294672677","doi":"https://doi.org/10.1109/rcar54675.2022.9872291","title":"Celebrating Robustness in Efficient Off-Policy Meta-Reinforcement Learning","display_name":"Celebrating Robustness in Efficient Off-Policy Meta-Reinforcement Learning","publication_year":2022,"publication_date":"2022-07-17","ids":{"openalex":"https://openalex.org/W4294672677","doi":"https://doi.org/10.1109/rcar54675.2022.9872291"},"language":"en","primary_location":{"id":"doi:10.1109/rcar54675.2022.9872291","is_oa":false,"landing_page_url":"https://doi.org/10.1109/rcar54675.2022.9872291","pdf_url":null,"source":{"id":"https://openalex.org/S4363608312","display_name":"2022 IEEE International Conference on Real-time Computing and Robotics (RCAR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Real-time Computing and Robotics (RCAR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100383203","display_name":"Ziyi Liu","orcid":"https://orcid.org/0000-0002-2407-9599"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ziyi Liu","raw_affiliation_strings":["NanKai University,Institute of Robotics and Automatic Information System and the Tianjin Key Laboratory of Intelligent Robotics, College of Artificial Intelligence,Tianjin,China,300350"],"affiliations":[{"raw_affiliation_string":"NanKai University,Institute of Robotics and Automatic Information System and the Tianjin Key Laboratory of Intelligent Robotics, College of Artificial Intelligence,Tianjin,China,300350","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032289171","display_name":"Zongyuan Li","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zongyuan Li","raw_affiliation_strings":["NanKai University,Institute of Robotics and Automatic Information System and the Tianjin Key Laboratory of Intelligent Robotics, College of Artificial Intelligence,Tianjin,China,300350"],"affiliations":[{"raw_affiliation_string":"NanKai University,Institute of Robotics and Automatic Information System and the Tianjin Key Laboratory of Intelligent Robotics, College of Artificial Intelligence,Tianjin,China,300350","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101790517","display_name":"Qianqian Cao","orcid":"https://orcid.org/0000-0002-1207-4002"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qianqian Cao","raw_affiliation_strings":["NanKai University,Institute of Robotics and Automatic Information System and the Tianjin Key Laboratory of Intelligent Robotics, College of Artificial Intelligence,Tianjin,China,300350"],"affiliations":[{"raw_affiliation_string":"NanKai University,Institute of Robotics and Automatic Information System and the Tianjin Key Laboratory of Intelligent Robotics, College of Artificial Intelligence,Tianjin,China,300350","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101635851","display_name":"Yuan Wan","orcid":"https://orcid.org/0000-0002-2227-7583"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Wan","raw_affiliation_strings":["Wuhan University of Technology,Department of Mathematical,WuHan,China,430070"],"affiliations":[{"raw_affiliation_string":"Wuhan University of Technology,Department of Mathematical,WuHan,China,430070","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078971977","display_name":"Xian Guo","orcid":"https://orcid.org/0000-0003-3793-1207"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xian Guo","raw_affiliation_strings":["NanKai University,Institute of Robotics and Automatic Information System and the Tianjin Key Laboratory of Intelligent Robotics, College of Artificial Intelligence,Tianjin,China,300350"],"affiliations":[{"raw_affiliation_string":"NanKai University,Institute of Robotics and Automatic Information System and the Tianjin Key Laboratory of Intelligent Robotics, College of Artificial Intelligence,Tianjin,China,300350","institution_ids":["https://openalex.org/I205237279"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100383203"],"corresponding_institution_ids":["https://openalex.org/I205237279"],"apc_list":null,"apc_paid":null,"fwci":0.1039,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.28339797,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"550","issue":null,"first_page":"499","last_page":"504"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9063155651092529},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8078563809394836},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6631431579589844},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.642341136932373},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6407104730606079},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5488297939300537},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4892308712005615},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.48741820454597473},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06987157464027405}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9063155651092529},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8078563809394836},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6631431579589844},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.642341136932373},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6407104730606079},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5488297939300537},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4892308712005615},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.48741820454597473},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06987157464027405},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/rcar54675.2022.9872291","is_oa":false,"landing_page_url":"https://doi.org/10.1109/rcar54675.2022.9872291","pdf_url":null,"source":{"id":"https://openalex.org/S4363608312","display_name":"2022 IEEE International Conference on Real-time Computing and Robotics (RCAR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Real-time Computing and Robotics (RCAR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W834081922","https://openalex.org/W1771410628","https://openalex.org/W1849277567","https://openalex.org/W1959608418","https://openalex.org/W2145339207","https://openalex.org/W2155007355","https://openalex.org/W2194775991","https://openalex.org/W2604763608","https://openalex.org/W2606433045","https://openalex.org/W2734377693","https://openalex.org/W2753738274","https://openalex.org/W2766447205","https://openalex.org/W2785342287","https://openalex.org/W2787501667","https://openalex.org/W2788904251","https://openalex.org/W2904246096","https://openalex.org/W2952526277","https://openalex.org/W2963341924","https://openalex.org/W2963446712","https://openalex.org/W2963864421","https://openalex.org/W2982641032","https://openalex.org/W2995049146","https://openalex.org/W2997574889","https://openalex.org/W3035216917","https://openalex.org/W4287632120","https://openalex.org/W4289388948","https://openalex.org/W4293469690","https://openalex.org/W4300001399","https://openalex.org/W4300971732","https://openalex.org/W6623316541","https://openalex.org/W6638018090","https://openalex.org/W6639204139","https://openalex.org/W6640963894","https://openalex.org/W6681096077","https://openalex.org/W6682849425","https://openalex.org/W6684921986","https://openalex.org/W6717697761","https://openalex.org/W6725739302","https://openalex.org/W6729433768","https://openalex.org/W6729906282","https://openalex.org/W6736057607","https://openalex.org/W6736368053","https://openalex.org/W6740756965","https://openalex.org/W6745921498","https://openalex.org/W6747943641","https://openalex.org/W6748566876","https://openalex.org/W6748600884","https://openalex.org/W6755476724","https://openalex.org/W6757592117","https://openalex.org/W6760698134","https://openalex.org/W6768532476","https://openalex.org/W6768602481","https://openalex.org/W6779441025","https://openalex.org/W6785022050"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2586732548","https://openalex.org/W3049728571"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,56],"learning":[2,36,57],"algorithms":[3,38,58],"can":[4,28,39],"enable":[5,40],"agents":[6,41],"to":[7,23,31,42,82,117,126],"learn":[8],"policies":[9,19],"for":[10],"complex":[11],"tasks":[12,45,75,132],"without":[13],"expert":[14],"knowledge.":[15],"However,":[16],"the":[17],"learned":[18],"are":[20],"typically":[21],"specialized":[22],"one":[24],"specific":[25],"task":[26],"and":[27,67,76,133,146],"not":[29,69],"generalize":[30],"new":[32,44],"tasks.":[33],"While":[34],"meta-reinforcement":[35],"(meta-RL)":[37],"solve":[43],"based":[46],"on":[47,54,149],"prior":[48],"experience,":[49],"most":[50],"of":[51,63],"them":[52],"build":[53],"on-policy":[55],"which":[59],"require":[60],"large":[61],"amounts":[62],"samples":[64],"during":[65],"meta-training":[66],"do":[68],"consider":[70],"task-specific":[71,110],"features":[72],"across":[73,130],"different":[74,131],"thus":[77],"make":[78],"it":[79],"very":[80],"difficult":[81],"train":[83],"an":[84,98,114],"agent":[85],"with":[86],"high":[87],"performance.":[88],"To":[89],"address":[90],"these":[91],"challenges,":[92],"in":[93,143],"this":[94],"paper,":[95],"we":[96],"propose":[97],"off-policy":[99],"meta-RL":[100,151],"algorithm":[101],"abbreviated":[102],"as":[103],"CRL":[104],"(Celebrating":[105],"Robustness":[106],"Learning)":[107],"that":[108],"disentangles":[109],"policy":[111],"parameters":[112],"by":[113],"adapter":[115],"network":[116],"shared":[118],"low-level":[119],"parameters,":[120],"learns":[121],"a":[122],"probabilistic":[123],"latent":[124],"space":[125],"extract":[127],"universal":[128],"information":[129],"perform":[134],"temporal-extended":[135],"exploration.":[136],"Our":[137],"approach":[138],"outperforms":[139],"baseline":[140],"methods":[141],"both":[142],"sample":[144],"efficiency":[145],"asymptotic":[147],"performance":[148],"several":[150],"benchmarks.":[152]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
