{"id":"https://openalex.org/W2966461828","doi":"https://doi.org/10.24963/ijcai.2019/387","title":"Meta Reinforcement Learning with Task Embedding and Shared Policy","display_name":"Meta Reinforcement Learning with Task Embedding and Shared Policy","publication_year":2019,"publication_date":"2019-07-28","ids":{"openalex":"https://openalex.org/W2966461828","doi":"https://doi.org/10.24963/ijcai.2019/387","mag":"2966461828"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2019/387","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/387","pdf_url":"https://www.ijcai.org/proceedings/2019/0387.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2019/0387.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016571508","display_name":"Lin Lan","orcid":"https://orcid.org/0000-0001-7363-1143"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lin Lan","raw_affiliation_strings":["MOE NSKEY Lab, Xi\u2019an Jiaotong University, China","MOE NSKEY Lab, Xi'an Jiaotong University, China"],"affiliations":[{"raw_affiliation_string":"MOE NSKEY Lab, Xi\u2019an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"MOE NSKEY Lab, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103196797","display_name":"Zhenguo Li","orcid":"https://orcid.org/0000-0002-8492-3069"},"institutions":[{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Zhenguo Li","raw_affiliation_strings":["Huawei Noah's Ark Lab"],"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab","institution_ids":["https://openalex.org/I4210159102"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075845093","display_name":"Xiaohong Guan","orcid":"https://orcid.org/0000-0002-8826-0362"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaohong Guan","raw_affiliation_strings":["Department of Automation and NLIST Lab, Tsinghua University, China","MOE NSKEY Lab, Xi\u2019an Jiaotong University, China","Shenzhen Research School, Xi\u2019an Jiaotong University, China","Shenzhen Research School, Xi'an Jiaotong University, China","MOE NSKEY Lab, Xi'an Jiaotong University, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation and NLIST Lab, Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"MOE NSKEY Lab, Xi\u2019an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"Shenzhen Research School, Xi\u2019an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"Shenzhen Research School, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"MOE NSKEY Lab, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022240408","display_name":"Pinghui Wang","orcid":"https://orcid.org/0000-0001-5779-6108"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pinghui Wang","raw_affiliation_strings":["MOE NSKEY Lab, Xi\u2019an Jiaotong University, China","Shenzhen Research School, Xi\u2019an Jiaotong University, China","MOE NSKEY Lab, Xi'an Jiaotong University, China","Shenzhen Research School, Xi'an Jiaotong University, China"],"affiliations":[{"raw_affiliation_string":"MOE NSKEY Lab, Xi\u2019an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"Shenzhen Research School, Xi\u2019an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"MOE NSKEY Lab, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"Shenzhen Research School, Xi'an Jiaotong University, China","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5016571508"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":3.3235,"has_fulltext":true,"cited_by_count":33,"citation_normalized_percentile":{"value":0.9393281,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2794","last_page":"2800"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9811000227928162,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9577999711036682,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.852500319480896},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8307033181190491},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7014818787574768},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6063677668571472},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5657931566238403},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.556977391242981},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5295950770378113},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.5254982113838196},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.48674559593200684},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4555337429046631},{"id":"https://openalex.org/keywords/multi-task-learning","display_name":"Multi-task learning","score":0.41821444034576416},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.32470279932022095}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.852500319480896},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8307033181190491},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7014818787574768},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6063677668571472},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5657931566238403},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.556977391242981},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5295950770378113},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.5254982113838196},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48674559593200684},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4555337429046631},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.41821444034576416},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.32470279932022095},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2019/387","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/387","pdf_url":"https://www.ijcai.org/proceedings/2019/0387.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2019/387","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/387","pdf_url":"https://www.ijcai.org/proceedings/2019/0387.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1067324075","display_name":null,"funder_award_id":"U1736205","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1830849682","display_name":null,"funder_award_id":"30500","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2453883782","display_name":null,"funder_award_id":"61603290","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2981938667","display_name":null,"funder_award_id":"Shenzhen","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3448220895","display_name":null,"funder_award_id":"161008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3757194791","display_name":null,"funder_award_id":"JCYJ20","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5641397543","display_name":null,"funder_award_id":"201708","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6989792634","display_name":null,"funder_award_id":"U1736205, 61603290","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320336567","display_name":"Natural Science Basic Research Program of Shaanxi Province","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2966461828.pdf","grobid_xml":"https://content.openalex.org/works/W2966461828.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1959608418","https://openalex.org/W2119717200","https://openalex.org/W2155007355","https://openalex.org/W2158782408","https://openalex.org/W2257979135","https://openalex.org/W2578206533","https://openalex.org/W2601450892","https://openalex.org/W2604763608","https://openalex.org/W2606433045","https://openalex.org/W2624871570","https://openalex.org/W2734377693","https://openalex.org/W2736601468","https://openalex.org/W2742093937","https://openalex.org/W2785397462","https://openalex.org/W2787387965","https://openalex.org/W2788904251","https://openalex.org/W2792305967","https://openalex.org/W2794757725","https://openalex.org/W2803767077","https://openalex.org/W2809290718","https://openalex.org/W2809469215","https://openalex.org/W2890260778","https://openalex.org/W2963161674","https://openalex.org/W2963391602","https://openalex.org/W2963864421","https://openalex.org/W2963887494","https://openalex.org/W4214717370","https://openalex.org/W4298857915","https://openalex.org/W4298857966","https://openalex.org/W6752718806","https://openalex.org/W6803771590","https://openalex.org/W6834709193","https://openalex.org/W6863252125","https://openalex.org/W6863318804"],"related_works":["https://openalex.org/W2264067234","https://openalex.org/W3124243301","https://openalex.org/W1571502335","https://openalex.org/W1589409554","https://openalex.org/W2759038785","https://openalex.org/W2172232600","https://openalex.org/W3123876860","https://openalex.org/W3124172198","https://openalex.org/W2046181650","https://openalex.org/W2142633247"],"abstract_inverted_index":{"Despite":[0],"significant":[1],"progress,":[2],"deep":[3],"reinforcement":[4],"learning":[5,168],"(RL)":[6],"suffers":[7],"from":[8,21],"data-inefficiency":[9],"and":[10,61,66,80,100,151,173,176],"limited":[11],"generalization.":[12],"Recent":[13],"efforts":[14],"apply":[15],"meta-learning":[16],"to":[17,76,81,83,91,103,122,179,181,187],"learn":[18,77,142],"a":[19,22,29,51,110,125,133,143],"meta-learner":[20,121],"set":[23],"of":[24],"RL":[25],"tasks":[26,44,79,150,161,175],"such":[27],"that":[28,163],"novel":[30,84,174],"but":[31],"related":[32],"task":[33,111,126,134,154],"could":[34],"be":[35],"solved":[36],"quickly.":[37],"Though":[38],"specific":[39,65,107],"in":[40,45],"some":[41],"ways,":[42],"different":[43,70],"meta-RL":[46,56],"are":[47],"generally":[48],"similar":[49],"at":[50],"high":[52],"level.":[53],"However,":[54],"most":[55],"methods":[57],"do":[58],"not":[59],"explicitly":[60],"adequately":[62],"model":[63],"the":[64,93,97,106,113],"shared":[67,94,147],"information":[68,95,108],"among":[69],"tasks,":[71],"which":[72,131,145],"limits":[73],"their":[74],"ability":[75],"training":[78,172],"generalize":[82],"tasks.":[85],"In":[86],"this":[87],"paper,":[88],"we":[89,117,141],"propose":[90],"capture":[92],"on":[96,112,137,153,158,170],"one":[98],"hand":[99],"meta-learn":[101],"how":[102],"quickly":[104,123],"abstract":[105],"about":[109],"other":[114],"hand.":[115],"Methodologically,":[116],"train":[118],"an":[119],"SGD":[120],"optimize":[124],"encoder":[127],"for":[128],"each":[129],"task,":[130],"generates":[132],"embedding":[135],"based":[136],"past":[138],"experience.":[139],"Meanwhile,":[140],"policy":[144],"is":[146],"across":[148],"all":[149],"conditioned":[152],"embeddings.":[155],"Empirical":[156],"results":[157],"four":[159],"simulated":[160],"demonstrate":[162],"our":[164],"method":[165],"has":[166],"better":[167],"capacity":[169],"both":[171],"attains":[177],"up":[178],"3":[180],"4":[182],"times":[183],"higher":[184],"returns":[185],"compared":[186],"baselines.":[188]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":3}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
