{"id":"https://openalex.org/W4409116005","doi":"https://doi.org/10.1561/2200000080","title":"A Tutorial on Meta-Reinforcement Learning","display_name":"A Tutorial on Meta-Reinforcement Learning","publication_year":2025,"publication_date":"2025-04-03","ids":{"openalex":"https://openalex.org/W4409116005","doi":"https://doi.org/10.1561/2200000080"},"language":"en","primary_location":{"id":"doi:10.1561/2200000080","is_oa":false,"landing_page_url":"https://doi.org/10.1561/2200000080","pdf_url":null,"source":{"id":"https://openalex.org/S4210188176","display_name":"Foundations and Trends\u00ae in Machine Learning","issn_l":"1935-8237","issn":["1935-8237","1935-8245"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318575","host_organization_name":"Now Publishers","host_organization_lineage":["https://openalex.org/P4310318575"],"host_organization_lineage_names":["Now Publishers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Foundations and Trends\u00ae in Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025141355","display_name":"Jacob Beck","orcid":"https://orcid.org/0000-0002-2991-3124"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Jacob Beck","raw_affiliation_strings":["University of Oxford ,"],"affiliations":[{"raw_affiliation_string":"University of Oxford ,","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001815216","display_name":"Risto Vuorio","orcid":null},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Risto Vuorio","raw_affiliation_strings":["University of Oxford ,"],"affiliations":[{"raw_affiliation_string":"University of Oxford ,","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045100443","display_name":"Evan Zheran Liu","orcid":"https://orcid.org/0009-0001-4411-7599"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Evan Zheran Liu","raw_affiliation_strings":["Stanford University ,"],"affiliations":[{"raw_affiliation_string":"Stanford University ,","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114948990","display_name":"Zheng Xiong","orcid":"https://orcid.org/0009-0002-3453-1478"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zheng Xiong","raw_affiliation_strings":["University of Oxford ,"],"affiliations":[{"raw_affiliation_string":"University of Oxford ,","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023346681","display_name":"Luisa Zintgraf","orcid":"https://orcid.org/0009-0003-5864-7632"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Luisa Zintgraf","raw_affiliation_strings":["University of Oxford ,"],"affiliations":[{"raw_affiliation_string":"University of Oxford ,","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005431772","display_name":"Chelsea Finn","orcid":"https://orcid.org/0000-0001-6298-0874"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chelsea Finn","raw_affiliation_strings":["Stanford University ,"],"affiliations":[{"raw_affiliation_string":"Stanford University ,","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056879203","display_name":"Shimon Whiteson","orcid":null},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shimon Whiteson","raw_affiliation_strings":["University of Oxford ,"],"affiliations":[{"raw_affiliation_string":"University of Oxford ,","institution_ids":["https://openalex.org/I40120149"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5025141355"],"corresponding_institution_ids":["https://openalex.org/I40120149"],"apc_list":null,"apc_paid":null,"fwci":53.3562,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.99830107,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"18","issue":"2-3","first_page":"224","last_page":"384"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.16439999639987946,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.16439999639987946,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5794687271118164},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5744931697845459},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5114027857780457},{"id":"https://openalex.org/keywords/mathematics-education","display_name":"Mathematics education","score":0.3331736624240875},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2712799906730652},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.254403293132782},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.07510045170783997}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5794687271118164},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5744931697845459},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5114027857780457},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.3331736624240875},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2712799906730652},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.254403293132782},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.07510045170783997}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1561/2200000080","is_oa":false,"landing_page_url":"https://doi.org/10.1561/2200000080","pdf_url":null,"source":{"id":"https://openalex.org/S4210188176","display_name":"Foundations and Trends\u00ae in Machine Learning","issn_l":"1935-8237","issn":["1935-8237","1935-8245"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318575","host_organization_name":"Now Publishers","host_organization_lineage":["https://openalex.org/P4310318575"],"host_organization_lineage_names":["Now Publishers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Foundations and Trends\u00ae in Machine Learning","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W158468099","https://openalex.org/W1486056878","https://openalex.org/W1542791059","https://openalex.org/W2039522160","https://openalex.org/W2149586740","https://openalex.org/W2150468603","https://openalex.org/W2162888803","https://openalex.org/W2236244207","https://openalex.org/W2257979135","https://openalex.org/W2580909119","https://openalex.org/W2901645090","https://openalex.org/W2949369413","https://openalex.org/W2963703448","https://openalex.org/W2964105864","https://openalex.org/W3007769740","https://openalex.org/W3008998212","https://openalex.org/W3009295642","https://openalex.org/W3020718655","https://openalex.org/W3034942609","https://openalex.org/W3090612618","https://openalex.org/W3092053846","https://openalex.org/W3107153805","https://openalex.org/W3130048609","https://openalex.org/W3130076912","https://openalex.org/W3130473386","https://openalex.org/W3130568950","https://openalex.org/W3133214370","https://openalex.org/W3134395475","https://openalex.org/W3174796182","https://openalex.org/W3175254947","https://openalex.org/W3199421945","https://openalex.org/W3200809500","https://openalex.org/W4205430897","https://openalex.org/W4226463308","https://openalex.org/W4238893454","https://openalex.org/W4241811150","https://openalex.org/W4285214238","https://openalex.org/W4315487473"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856"],"abstract_inverted_index":{"While":[0],"deep":[1,180],"reinforcement":[2],"learning":[3,55,143],"(RL)":[4],"has":[5],"fueled":[6],"multiple":[7],"high-profile":[8],"successes":[9],"in":[10,58,68,113],"machine":[11,54],"learning,":[12],"it":[13,34],"is":[14,43,64,80,86],"held":[15],"back":[16],"from":[17,94],"more":[18],"widespread":[19],"adoption":[20],"by":[21,162],"its":[22,118],"often":[23],"poor":[24],"data":[25,101],"efficiency":[26],"and":[27,141,158],"the":[28,32,46,78,95,109,135,142,164,168,175],"limited":[29],"generality":[30],"of":[31,48,76,88,137,174],"policies":[33],"produces.":[35],"A":[36],"promising":[37],"approach":[38],"for":[39,146,178],"alleviating":[40],"these":[41,151],"limitations":[42],"to":[44,81,90,170],"cast":[45],"development":[47],"better":[49],"RL":[50,181],"algorithms":[51,157],"as":[52,99,102,115,117],"a":[53,59,69,74,83,125,138,179],"problem":[56,70,111],"itself":[57],"process":[60],"called":[61],"meta-RL.":[62],"Meta-RL":[63],"most":[65],"commonly":[66],"studied":[67],"setting":[71,112],"where,":[72],"given":[73],"distribution":[75,97,140],"tasks,":[77],"goal":[79],"learn":[82],"policy":[84],"that":[85],"capable":[87],"adapting":[89],"any":[91],"new":[92],"task":[93,96,139],"with":[98],"little":[100],"possible.":[103],"In":[104],"this":[105],"survey,":[106],"we":[107,153],"describe":[108],"meta-RL":[110,128,156,172],"detail":[114],"well":[116],"major":[119],"variations.":[120],"We":[121,160],"discuss":[122],"how,":[123],"at":[124],"high":[126],"level,":[127],"research":[129],"can":[130],"be":[131],"clustered":[132],"based":[133],"on":[134,167],"presence":[136],"budget":[144],"available":[145],"each":[147],"individual":[148],"task.":[149],"Using":[150],"clusters,":[152],"then":[154],"survey":[155],"applications.":[159],"conclude":[161],"presenting":[163],"open":[165],"problems":[166],"path":[169],"making":[171],"part":[173],"standard":[176],"toolbox":[177],"practitioner.":[182]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":16}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
