{"id":"https://openalex.org/W4312713068","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892381","title":"MaxEnt Dreamer: Maximum Entropy Reinforcement Learning with World Model","display_name":"MaxEnt Dreamer: Maximum Entropy Reinforcement Learning with World Model","publication_year":2022,"publication_date":"2022-07-18","ids":{"openalex":"https://openalex.org/W4312713068","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892381"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn55064.2022.9892381","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892381","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101372601","display_name":"Hongying Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongying Ma","raw_affiliation_strings":["Shanghai Jiao Tong University,Shanghai,China,200240"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Shanghai,China,200240","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012509620","display_name":"Wuyang Xue","orcid":"https://orcid.org/0000-0002-7284-0629"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wuyang Xue","raw_affiliation_strings":["Shanghai Jiao Tong University,Shanghai,China,200240"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Shanghai,China,200240","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048965499","display_name":"Rendong Ying","orcid":"https://orcid.org/0000-0001-6670-149X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rendong Ying","raw_affiliation_strings":["Shanghai Jiao Tong University,Shanghai,China,200240"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Shanghai,China,200240","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032986088","display_name":"Peilin Liu","orcid":"https://orcid.org/0000-0002-5321-2336"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"PeiLin Liu","raw_affiliation_strings":["Shanghai Jiao Tong University,Shanghai,China,200240"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Shanghai,China,200240","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101372601"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15773313,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"8","issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9473047852516174},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.708885908126831},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.6898921132087708},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.688093364238739},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6032376885414124},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5787044167518616},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5241253972053528},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.45574474334716797},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4190214276313782},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.35158324241638184},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19097569584846497},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11815375089645386}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9473047852516174},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.708885908126831},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.6898921132087708},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.688093364238739},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6032376885414124},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5787044167518616},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5241253972053528},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.45574474334716797},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4190214276313782},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.35158324241638184},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19097569584846497},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11815375089645386},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn55064.2022.9892381","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892381","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W2098774185","https://openalex.org/W2140135625","https://openalex.org/W2145339207","https://openalex.org/W2158782408","https://openalex.org/W2201581102","https://openalex.org/W2623491082","https://openalex.org/W2736601468","https://openalex.org/W2742169147","https://openalex.org/W2746553466","https://openalex.org/W2781585732","https://openalex.org/W2781726626","https://openalex.org/W2785738552","https://openalex.org/W2787666871","https://openalex.org/W2798494119","https://openalex.org/W2798705390","https://openalex.org/W2859967432","https://openalex.org/W2888492136","https://openalex.org/W2946723315","https://openalex.org/W2949561945","https://openalex.org/W2951799221","https://openalex.org/W2963403593","https://openalex.org/W2963523627","https://openalex.org/W2963722050","https://openalex.org/W2964006217","https://openalex.org/W2964043796","https://openalex.org/W2975395546","https://openalex.org/W2977843878","https://openalex.org/W2989847975","https://openalex.org/W2995298643","https://openalex.org/W2996449210","https://openalex.org/W3118210634","https://openalex.org/W3123348991","https://openalex.org/W3130475119","https://openalex.org/W3169291081","https://openalex.org/W3210788351","https://openalex.org/W4214717370","https://openalex.org/W4239125046","https://openalex.org/W4287779179","https://openalex.org/W4288319859","https://openalex.org/W4289294484","https://openalex.org/W4298324247","https://openalex.org/W4298876402","https://openalex.org/W4300799055","https://openalex.org/W6639949747","https://openalex.org/W6674884181","https://openalex.org/W6680657880","https://openalex.org/W6683300800","https://openalex.org/W6685444567","https://openalex.org/W6687681856","https://openalex.org/W6692846177","https://openalex.org/W6734517396","https://openalex.org/W6739193204","https://openalex.org/W6740801417","https://openalex.org/W6741002519","https://openalex.org/W6747387971","https://openalex.org/W6747473740","https://openalex.org/W6747924173","https://openalex.org/W6749821205","https://openalex.org/W6753183898","https://openalex.org/W6756256016","https://openalex.org/W6761450092","https://openalex.org/W6762640273","https://openalex.org/W6764053384","https://openalex.org/W6768017362","https://openalex.org/W6771217966","https://openalex.org/W6771656438","https://openalex.org/W6778000925","https://openalex.org/W6789293996","https://openalex.org/W6790970192","https://openalex.org/W6796289742","https://openalex.org/W6796426860","https://openalex.org/W6803185011","https://openalex.org/W6820391488"],"related_works":["https://openalex.org/W4400868993","https://openalex.org/W2145363145","https://openalex.org/W2341346307","https://openalex.org/W2154399718","https://openalex.org/W4321463377","https://openalex.org/W4312713068","https://openalex.org/W4384574988","https://openalex.org/W2768629321","https://openalex.org/W1914583973","https://openalex.org/W2130711276"],"abstract_inverted_index":{"Model-based":[0],"reinforcement":[1,37,117,129],"learning":[2,38,130],"algorithms":[3,29,39],"can":[4],"alleviate":[5],"the":[6,19,26,45,52,70,85,97,104,107,112,123,134],"low":[7],"sample":[8],"efficiency":[9],"problem":[10],"compared":[11],"with":[12,44],"modelfree":[13],"methods":[14],"for":[15],"control":[16,146],"tasks.":[17,148],"However,":[18],"learned":[20,46,53,71,98],"policy's":[21],"performance":[22,136],"often":[23],"lags":[24],"behind":[25],"best":[27],"model-free":[28],"since":[30],"its":[31],"weak":[32,66],"exploration":[33,68],"ability.":[34],"Existing":[35],"model-based":[36,128],"learn":[40,84],"policy":[41,54,67,88,109],"by":[42,101,133],"interacting":[43],"world":[47,61,72,92,99],"model":[48,62,73,100],"and":[49,106,144],"then":[50],"use":[51],"to":[55,65,83,127],"guide":[56],"a":[57,75,79,91],"new":[58],"round":[59],"of":[60,114,121,137],"learning.":[63,118],"Due":[64],"ability,":[69],"has":[74],"large":[76],"bias.":[77],"As":[78],"result,":[80],"it":[81],"fails":[82],"globally":[86],"optimal":[87],"on":[89,140],"such":[90],"model.":[93],"This":[94],"paper":[95],"improves":[96],"maximizing":[102],"both":[103],"reward":[105],"corresponding":[108],"entropy":[110,116,125],"in":[111],"framework":[113],"maximum":[115,124],"The":[119],"effectiveness":[120],"applying":[122],"approach":[126],"is":[131],"supported":[132],"better":[135],"our":[138],"algorithm":[139],"several":[141],"complex":[142],"mujoco":[143],"deepmind":[145],"suite":[147]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
