{"id":"https://openalex.org/W4399849504","doi":"https://doi.org/10.1109/lra.2024.3417114","title":"Dream to Adapt: Meta Reinforcement Learning by Latent Context Imagination and MDP Imagination","display_name":"Dream to Adapt: Meta Reinforcement Learning by Latent Context Imagination and MDP Imagination","publication_year":2024,"publication_date":"2024-06-20","ids":{"openalex":"https://openalex.org/W4399849504","doi":"https://doi.org/10.1109/lra.2024.3417114"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2024.3417114","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3417114","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102827067","display_name":"Lu Wen","orcid":"https://orcid.org/0000-0002-8197-8195"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Lu Wen","raw_affiliation_strings":["Mechanical Engineering, University of Michigan, Ann Arbor, MI, USA"],"raw_orcid":"https://orcid.org/0000-0002-8197-8195","affiliations":[{"raw_affiliation_string":"Mechanical Engineering, University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020219990","display_name":"Eric Tseng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210130704","display_name":"University of Michigan\u2013Dearborn","ror":"https://ror.org/035wtm547","country_code":"US","type":"education","lineage":["https://openalex.org/I4210130704"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eric H. Tseng","raw_affiliation_strings":["Department of Electrical Engineering, University of Texas at Arlington, Dearborn, MI, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, University of Texas at Arlington, Dearborn, MI, USA","institution_ids":["https://openalex.org/I4210130704"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056009561","display_name":"Huei Peng","orcid":"https://orcid.org/0000-0002-7684-1696"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Huei Peng","raw_affiliation_strings":["Mechanical Engineering, University of Michigan, Ann Arbor, MI, USA"],"raw_orcid":"https://orcid.org/0000-0002-7684-1696","affiliations":[{"raw_affiliation_string":"Mechanical Engineering, University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045427668","display_name":"Songan Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Songan Zhang","raw_affiliation_strings":["Global Institute of Future Technology, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-3238-5406","affiliations":[{"raw_affiliation_string":"Global Institute of Future Technology, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102827067"],"corresponding_institution_ids":["https://openalex.org/I27837315"],"apc_list":null,"apc_paid":null,"fwci":1.6557,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.85918349,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":"9","issue":"11","first_page":"9701","last_page":"9708"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.22419999539852142,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.22419999539852142,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dream","display_name":"Dream","score":0.7317308187484741},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.583538293838501},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.5734645128250122},{"id":"https://openalex.org/keywords/imagination","display_name":"Imagination","score":0.5420759916305542},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.49507370591163635},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.45211753249168396},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.3541313409805298},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.2956084609031677},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.20288947224617004},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19819536805152893},{"id":"https://openalex.org/keywords/psychotherapist","display_name":"Psychotherapist","score":0.1701679527759552},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.1046135425567627}],"concepts":[{"id":"https://openalex.org/C2781095916","wikidata":"https://www.wikidata.org/wiki/Q36348","display_name":"Dream","level":2,"score":0.7317308187484741},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.583538293838501},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.5734645128250122},{"id":"https://openalex.org/C2868805","wikidata":"https://www.wikidata.org/wiki/Q34516","display_name":"Imagination","level":2,"score":0.5420759916305542},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.49507370591163635},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.45211753249168396},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3541313409805298},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.2956084609031677},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.20288947224617004},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19819536805152893},{"id":"https://openalex.org/C542102704","wikidata":"https://www.wikidata.org/wiki/Q183257","display_name":"Psychotherapist","level":1,"score":0.1701679527759552},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.1046135425567627},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2024.3417114","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3417114","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5400000214576721,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1965455100","https://openalex.org/W2085842743","https://openalex.org/W2157331557","https://openalex.org/W2578206533","https://openalex.org/W2753738274","https://openalex.org/W2796704765","https://openalex.org/W2925893094","https://openalex.org/W2963108767","https://openalex.org/W2979869797","https://openalex.org/W2995298643","https://openalex.org/W3035216917","https://openalex.org/W3122690883","https://openalex.org/W3131719819","https://openalex.org/W3188527995","https://openalex.org/W3193667944","https://openalex.org/W3197711175","https://openalex.org/W3209381383","https://openalex.org/W3215905638","https://openalex.org/W4294646197","https://openalex.org/W4300971732","https://openalex.org/W4312299945","https://openalex.org/W6729433768","https://openalex.org/W6731982132","https://openalex.org/W6736057607","https://openalex.org/W6744627333","https://openalex.org/W6747473740","https://openalex.org/W6748391871","https://openalex.org/W6750254146","https://openalex.org/W6750852989","https://openalex.org/W6752378368","https://openalex.org/W6760698134","https://openalex.org/W6768791272","https://openalex.org/W6769596995","https://openalex.org/W6771217966","https://openalex.org/W6779307311","https://openalex.org/W6779441025","https://openalex.org/W6779605632","https://openalex.org/W6783988234","https://openalex.org/W6784057640","https://openalex.org/W6790309923","https://openalex.org/W6790599198","https://openalex.org/W6791666512","https://openalex.org/W6795758547","https://openalex.org/W6799214972","https://openalex.org/W6800347387","https://openalex.org/W6801303347","https://openalex.org/W6804870611"],"related_works":["https://openalex.org/W1762480892","https://openalex.org/W597599663","https://openalex.org/W3194824274","https://openalex.org/W4205387075","https://openalex.org/W1537165133","https://openalex.org/W2390579330","https://openalex.org/W1578892932","https://openalex.org/W2387132837","https://openalex.org/W2316163406","https://openalex.org/W2369963050"],"abstract_inverted_index":{"Meta":[0,26,60],"reinforcement":[1],"learning":[2],"(Meta":[3],"RL)":[4],"has":[5],"been":[6],"amply":[7],"explored":[8],"to":[9,33,105],"quickly":[10],"learn":[11],"an":[12],"unseen":[13],"task":[14,40],"by":[15,71,80],"transferring":[16],"previously":[17],"learned":[18,84],"knowledge":[19,102],"from":[20],"similar":[21],"tasks.":[22],"However,":[23],"most":[24],"state-of-the-art":[25],"RL":[27,61],"algorithms":[28],"require":[29],"the":[30,39,83,96],"meta-training":[31],"tasks":[32,68],"have":[34],"a":[35,43,58],"dense":[36],"coverage":[37],"of":[38,46,50],"distribution":[41],"and":[42,69,74,123],"great":[44],"amount":[45],"data":[47,70,121],"for":[48],"each":[49],"them.":[51],"In":[52],"this":[53],"letter,":[54],"we":[55],"propose":[56],"MetaDreamer,":[57],"context-based":[59],"algorithm":[62],"that":[63,115],"requires":[64],"less":[65],"real":[66],"training":[67],"doing":[72],"meta-imagination":[73,79],"MDP-imagination":[75,94],"(Markov-Decision-Process).":[76],"We":[77],"perform":[78],"interpolating":[81],"on":[82],"latent":[85],"context":[86],"space":[87],"with":[88,111],"disentangled":[89],"properties,":[90],"as":[91,93],"well":[92],"through":[95],"generative":[97],"world":[98],"model":[99],"where":[100],"physical":[101],"is":[103],"added":[104],"plain":[106],"VAE":[107],"networks.":[108],"Our":[109],"experiments":[110],"various":[112],"benchmarks":[113],"show":[114],"MetaDreamer":[116],"outperforms":[117],"existing":[118],"approaches":[119],"in":[120],"efficiency":[122],"interpolated":[124],"generalization.":[125]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2}],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
