{"id":"https://openalex.org/W4385482775","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191425","title":"Variational Skill Embeddings for Meta Reinforcement Learning","display_name":"Variational Skill Embeddings for Meta Reinforcement Learning","publication_year":2023,"publication_date":"2023-06-18","ids":{"openalex":"https://openalex.org/W4385482775","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191425"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn54540.2023.10191425","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn54540.2023.10191425","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"conference-paper","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061908942","display_name":"Jen\u2010Tzung Chien","orcid":"https://orcid.org/0000-0003-3466-8941"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Jen-Tzung Chien","raw_affiliation_strings":["Institute of Electrical and Computer Engineering, National Yang Ming Chiao Tung University,Taiwan","Institute of Electrical and Computer Engineering, National Yang Ming Chiao Tung University, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Electrical and Computer Engineering, National Yang Ming Chiao Tung University,Taiwan","institution_ids":["https://openalex.org/I148366613"]},{"raw_affiliation_string":"Institute of Electrical and Computer Engineering, National Yang Ming Chiao Tung University, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016035320","display_name":"Weiwei Lai","orcid":null},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Weiwei Lai","raw_affiliation_strings":["Institute of Electrical and Computer Engineering, National Yang Ming Chiao Tung University,Taiwan","Institute of Electrical and Computer Engineering, National Yang Ming Chiao Tung University, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Electrical and Computer Engineering, National Yang Ming Chiao Tung University,Taiwan","institution_ids":["https://openalex.org/I148366613"]},{"raw_affiliation_string":"Institute of Electrical and Computer Engineering, National Yang Ming Chiao Tung University, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I148366613"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9571999907493591,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8595768809318542},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7720493078231812},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7199959754943848},{"id":"https://openalex.org/keywords/meta-learning","display_name":"Meta learning (computer science)","score":0.6914582848548889},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6059495806694031},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5478439331054688},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5289419293403625},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5208319425582886},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5196791291236877},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4653158187866211},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.35378462076187134},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1588868796825409}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8595768809318542},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7720493078231812},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7199959754943848},{"id":"https://openalex.org/C2781002164","wikidata":"https://www.wikidata.org/wiki/Q6822311","display_name":"Meta learning (computer science)","level":3,"score":0.6914582848548889},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6059495806694031},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5478439331054688},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5289419293403625},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5208319425582886},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5196791291236877},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4653158187866211},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.35378462076187134},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1588868796825409},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn54540.2023.10191425","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn54540.2023.10191425","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W567721252","https://openalex.org/W1959608418","https://openalex.org/W2109910161","https://openalex.org/W2158782408","https://openalex.org/W2242047860","https://openalex.org/W2547875792","https://openalex.org/W2556477470","https://openalex.org/W2578206533","https://openalex.org/W2604763608","https://openalex.org/W2606433045","https://openalex.org/W2749679082","https://openalex.org/W2781726626","https://openalex.org/W2785342287","https://openalex.org/W2952526277","https://openalex.org/W2963161674","https://openalex.org/W2963438456","https://openalex.org/W2964227312","https://openalex.org/W2973112754","https://openalex.org/W2981344907","https://openalex.org/W2982316857","https://openalex.org/W2995726179","https://openalex.org/W3013821552","https://openalex.org/W3093635834","https://openalex.org/W3117383065","https://openalex.org/W3121879877","https://openalex.org/W3132647335","https://openalex.org/W4210660489","https://openalex.org/W4224944611","https://openalex.org/W4226453515","https://openalex.org/W4283365039","https://openalex.org/W4288289109","https://openalex.org/W4289388948","https://openalex.org/W4293469690","https://openalex.org/W4294646197","https://openalex.org/W4312554247","https://openalex.org/W4313887415","https://openalex.org/W4375868933","https://openalex.org/W4404752311","https://openalex.org/W6616173779","https://openalex.org/W6640963894","https://openalex.org/W6729448088","https://openalex.org/W6729906282","https://openalex.org/W6730153900","https://openalex.org/W6731982132","https://openalex.org/W6736057607","https://openalex.org/W6736368053","https://openalex.org/W6744935223","https://openalex.org/W6747473740","https://openalex.org/W6748566876","https://openalex.org/W6748603076","https://openalex.org/W6750254146","https://openalex.org/W6755476724","https://openalex.org/W6760698134","https://openalex.org/W6764125455","https://openalex.org/W6764724164","https://openalex.org/W6768602481","https://openalex.org/W6769596995","https://openalex.org/W6775647304","https://openalex.org/W6788646031","https://openalex.org/W6790552527","https://openalex.org/W6810488170","https://openalex.org/W6810975397","https://openalex.org/W6874449412"],"related_works":["https://openalex.org/W2159052453","https://openalex.org/W3013693939","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W3130669838","https://openalex.org/W2785397462","https://openalex.org/W4294873804","https://openalex.org/W4383109125","https://openalex.org/W4283332751","https://openalex.org/W2891227010"],"abstract_inverted_index":{"Meta":[0],"reinforcement":[1],"learning":[2,64],"(meta-RL)":[3],"aims":[4],"to":[5,16,36,40,80,89],"learn":[6,41],"useful":[7],"prior":[8],"knowledge":[9],"across":[10,146],"tasks":[11,20,50,147,160],"which":[12,51],"can":[13,77],"be":[14,78],"generalized":[15],"unseen":[17],"but":[18,83],"similar":[19],"with":[21,96],"only":[22],"a":[23,90,102,114,124,128,156],"small":[24],"number":[25,157],"of":[26,44,67,109,123,150,158,164],"adaptation":[27],"steps.":[28],"Traditionally,":[29],"the":[30,38,42,57,61,65,68,84,107,118,137,142,162],"gradient-based":[31],"metal":[32],"RL":[33],"was":[34],"proposed":[35,134],"use":[37],"gradients":[39],"parameters":[43],"an":[45],"adaptive":[46],"policy":[47,76,110],"from":[48],"different":[49],"likely":[52],"lacked":[53],"sample":[54],"efficiency.":[55],"Recently,":[56],"context-based":[58,115,151],"meta-RL":[59,105,116],"improved":[60],"efficiency":[62],"by":[63,140],"embeddings":[66,167],"trajectories":[69],"based":[70],"on":[71,155],"context":[72,92],"representation.":[73],"The":[74,133,153],"learned":[75],"adapted":[79],"new":[81],"tasks,":[82],"performance":[85],"is":[86,111,131],"bounded":[87],"due":[88],"simple":[91],"encoder.":[93],"To":[94],"deal":[95],"this":[97,99],"insufficiency,":[98],"paper":[100],"presents":[101],"novel":[103],"regularized":[104,169],"where":[106,117],"generalization":[108],"enhanced":[112],"through":[113],"conditional":[119],"variational":[120,165],"autoencoder":[121],"consisting":[122],"context-skill":[125],"encoder":[126],"and":[127],"soft-actor-critic":[129],"decoder":[130],"implemented.":[132],"method":[135],"pursues":[136],"model":[138],"regularization":[139],"discovering":[141],"shared":[143],"skill":[144,166],"patterns":[145],"in":[148],"implementation":[149],"meta-RL.":[152,170],"experiments":[154],"benchmark":[159],"show":[161],"merit":[163],"for":[168]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-07-14T23:27:15.235271","created_date":"2025-10-10T00:00:00"}