{"id":"https://openalex.org/W4410614012","doi":"https://doi.org/10.1109/lra.2025.3572822","title":"Maximum Next-State Entropy for Efficient Reinforcement Learning","display_name":"Maximum Next-State Entropy for Efficient Reinforcement Learning","publication_year":2025,"publication_date":"2025-05-22","ids":{"openalex":"https://openalex.org/W4410614012","doi":"https://doi.org/10.1109/lra.2025.3572822"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2025.3572822","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2025.3572822","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063835591","display_name":"Dianyu Zhong","orcid":"https://orcid.org/0000-0002-3262-4905"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dianyu Zhong","raw_affiliation_strings":["Center for Intelligent and Networked Systems (CFINS), Department of Automation and BNRist, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-3262-4905","affiliations":[{"raw_affiliation_string":"Center for Intelligent and Networked Systems (CFINS), Department of Automation and BNRist, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046252455","display_name":"Yiqin Yang","orcid":"https://orcid.org/0000-0002-8748-1964"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiqin Yang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-8748-1964","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101494072","display_name":"Ziyou Zhang","orcid":"https://orcid.org/0000-0003-3839-7747"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziyou Zhang","raw_affiliation_strings":["Center for Intelligent and Networked Systems (CFINS), Department of Automation and BNRist, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3839-7747","affiliations":[{"raw_affiliation_string":"Center for Intelligent and Networked Systems (CFINS), Department of Automation and BNRist, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yuhua Jiang","orcid":"https://orcid.org/0000-0003-3081-6038"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhua Jiang","raw_affiliation_strings":["Center for Intelligent and Networked Systems (CFINS), Department of Automation and BNRist, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3081-6038","affiliations":[{"raw_affiliation_string":"Center for Intelligent and Networked Systems (CFINS), Department of Automation and BNRist, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Bo Xu","orcid":"https://orcid.org/0000-0002-1111-1529"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Xu","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-1111-1529","affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014109600","display_name":"Qianchuan Zhao","orcid":"https://orcid.org/0000-0002-7952-5621"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qianchuan Zhao","raw_affiliation_strings":["Center for Intelligent and Networked Systems (CFINS), Department of Automation and BNRist, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-7952-5621","affiliations":[{"raw_affiliation_string":"Center for Intelligent and Networked Systems (CFINS), Department of Automation and BNRist, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5063835591"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04991889,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"10","issue":"7","first_page":"6896","last_page":"6903"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9289000034332275,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":0.9157999753952026,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.800513744354248},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.5300830602645874},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4929201602935791},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.46193209290504456},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4530654549598694},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.42015111446380615},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4193994700908661},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35870859026908875},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1930181086063385},{"id":"https://openalex.org/keywords/thermodynamics","display_name":"Thermodynamics","score":0.10669565200805664},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09402665495872498},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.07956159114837646},{"id":"https://openalex.org/keywords/structural-engineering","display_name":"Structural engineering","score":0.050083667039871216}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.800513744354248},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.5300830602645874},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4929201602935791},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.46193209290504456},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4530654549598694},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.42015111446380615},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4193994700908661},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35870859026908875},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1930181086063385},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.10669565200805664},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09402665495872498},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.07956159114837646},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.050083667039871216}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2025.3572822","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2025.3572822","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G184828240","display_name":null,"funder_award_id":"62192751","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1974508623","https://openalex.org/W1993411524","https://openalex.org/W2158782408","https://openalex.org/W2811378406","https://openalex.org/W2951360122","https://openalex.org/W2963403593","https://openalex.org/W2963523627","https://openalex.org/W2997289589","https://openalex.org/W3004777436","https://openalex.org/W3037606473","https://openalex.org/W4393160216","https://openalex.org/W6681478256","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6692846177","https://openalex.org/W6713603661","https://openalex.org/W6717230150","https://openalex.org/W6734517396","https://openalex.org/W6743660412","https://openalex.org/W6747473740","https://openalex.org/W6748314335","https://openalex.org/W6755612348","https://openalex.org/W6756287877","https://openalex.org/W6756303580","https://openalex.org/W6757592117","https://openalex.org/W6758076299","https://openalex.org/W6758978475","https://openalex.org/W6759315145","https://openalex.org/W6762863188","https://openalex.org/W6769596995","https://openalex.org/W6771807793","https://openalex.org/W6776601253","https://openalex.org/W6790779738","https://openalex.org/W6800004206","https://openalex.org/W6801964084","https://openalex.org/W6803660280","https://openalex.org/W6804601995","https://openalex.org/W6853529480"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2970690932","https://openalex.org/W2110715801"],"abstract_inverted_index":{"Entropy":[0],"regularization":[1,85],"is":[2],"widely":[3],"used":[4],"to":[5,26,47],"improve":[6,158],"policy":[7,42,83],"optimization":[8],"and":[9,21,74,81,152],"encourage":[10,44],"exploration":[11],"in":[12],"reinforcement":[13],"learning.":[14],"By":[15],"maximizing":[16],"both":[17],"the":[18,23,29,45,66,70,75,120,137,140,159],"expected":[19],"return":[20],"entropy,":[22],"agent":[24,46],"aims":[25],"succeed":[27],"at":[28],"task":[30],"while":[31],"acting":[32],"as":[33,35],"randomly":[34],"possible.":[36],"However,":[37],"current":[38,82],"methods":[39],"based":[40],"on":[41,147],"entropy":[43,73,80,84,113,134,138],"explore":[48],"diverse":[49,58],"actions,":[50],"but":[51],"they":[52],"do":[53],"not":[54],"directly":[55],"promote":[56],"exploring":[57],"states.":[59],"In":[60],"this":[61,89],"study,":[62],"we":[63,91],"theoretically":[64],"reveal":[65],"challenge":[67],"of":[68,139,161],"optimizing":[69,136],"agent's":[71],"next-state":[72,79,112,133],"gap":[76],"between":[77],"maximum":[78],"methods.":[86],"To":[87],"address":[88],"limitation,":[90],"introduce":[92],"<bold":[93,96,101],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[94,97,99,102,105],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">M</b>aximum":[95],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">N</b>ext-<bold":[98],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">S</b>tate":[100],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">E</b>ntropy":[103],"(<bold":[104],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">MNSE</b>),":[106],"a":[107,125],"novel":[108],"method":[109],"that":[110,129,154],"maximizes":[111],"through":[114],"an":[115],"action":[116],"mapping":[117],"layer":[118],"following":[119],"inner":[121,141],"policy.":[122,142],"We":[123,143],"provide":[124],"theoretical":[126],"analysis":[127],"demonstrating":[128],"MNSE":[130,155],"can":[131,156],"maximize":[132],"by":[135],"conduct":[144],"extensive":[145],"experiments":[146],"various":[148],"continuous":[149],"control":[150],"tasks":[151],"demonstrate":[153],"significantly":[157],"performance":[160],"RL":[162],"algorithms.":[163]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
