{"id":"https://openalex.org/W4392979981","doi":"https://doi.org/10.1109/tnnls.2024.3376538","title":"MEOL: A Maximum-Entropy Framework for Options Learning","display_name":"MEOL: A Maximum-Entropy Framework for Options Learning","publication_year":2024,"publication_date":"2024-03-20","ids":{"openalex":"https://openalex.org/W4392979981","doi":"https://doi.org/10.1109/tnnls.2024.3376538","pmid":"https://pubmed.ncbi.nlm.nih.gov/38507376"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2024.3376538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3376538","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051662373","display_name":"Pin Zhang","orcid":"https://orcid.org/0000-0002-4730-7048"},"institutions":[{"id":"https://openalex.org/I4210104252","display_name":"Air Force Engineering University","ror":"https://ror.org/00seraz22","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210104252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pin Zhang","raw_affiliation_strings":["College of Aeronautics Engineering, Air Force Engineering University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0000-0002-4730-7048","affiliations":[{"raw_affiliation_string":"College of Aeronautics Engineering, Air Force Engineering University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I4210104252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059956025","display_name":"Wenhan Dong","orcid":"https://orcid.org/0000-0002-3076-6707"},"institutions":[{"id":"https://openalex.org/I4210104252","display_name":"Air Force Engineering University","ror":"https://ror.org/00seraz22","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210104252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhan Dong","raw_affiliation_strings":["College of Aeronautics Engineering, Air Force Engineering University, Xi&#x2019;an, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Aeronautics Engineering, Air Force Engineering University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I4210104252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102831142","display_name":"Ming Cai","orcid":"https://orcid.org/0000-0002-6039-5214"},"institutions":[{"id":"https://openalex.org/I4210104252","display_name":"Air Force Engineering University","ror":"https://ror.org/00seraz22","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210104252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Cai","raw_affiliation_strings":["College of Aeronautics Engineering, Air Force Engineering University, Xi&#x2019;an, China"],"raw_orcid":"https://orcid.org/0000-0002-6039-5214","affiliations":[{"raw_affiliation_string":"College of Aeronautics Engineering, Air Force Engineering University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I4210104252"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053733111","display_name":"Shengde Jia","orcid":"https://orcid.org/0000-0002-5334-3280"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengde Jia","raw_affiliation_strings":["College of Mechatronic Engineering and Automation, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0002-5334-3280","affiliations":[{"raw_affiliation_string":"College of Mechatronic Engineering and Automation, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032334945","display_name":"Zipeng Wang","orcid":"https://orcid.org/0000-0002-6269-789X"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zi-Peng Wang","raw_affiliation_strings":["Faculty of Information Technology, Beijing Laboratory of Smart Environmental Protection, Beijing Key Laboratory of Computational Intelligence and Intelligent System, and Beijing Institute of Artificial Intelligence, Beijing University of Technology, Beijing, China","Beijing Key Laboratory of Computational Intelligence and Intelligent System, and Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing Laboratory of Smart Environmental Protection, Beijing University of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-6269-789X","affiliations":[{"raw_affiliation_string":"Faculty of Information Technology, Beijing Laboratory of Smart Environmental Protection, Beijing Key Laboratory of Computational Intelligence and Intelligent System, and Beijing Institute of Artificial Intelligence, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]},{"raw_affiliation_string":"Beijing Key Laboratory of Computational Intelligence and Intelligent System, and Beijing Institute of Artificial Intelligence, Faculty of Information Technology, Beijing Laboratory of Smart Environmental Protection, Beijing University of Technology, Beijing, China","institution_ids":["https://openalex.org/I37796252"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6109,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.71132854,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"36","issue":"3","first_page":"4834","last_page":"4848"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7686502933502197},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5974122285842896},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5287373065948486},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5058281421661377},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.48222100734710693},{"id":"https://openalex.org/keywords/maximization","display_name":"Maximization","score":0.44849586486816406},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4367522597312927},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.41687268018722534},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.31370264291763306},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10602834820747375}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7686502933502197},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5974122285842896},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5287373065948486},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5058281421661377},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.48222100734710693},{"id":"https://openalex.org/C2776330181","wikidata":"https://www.wikidata.org/wiki/Q18358244","display_name":"Maximization","level":2,"score":0.44849586486816406},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4367522597312927},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.41687268018722534},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.31370264291763306},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10602834820747375},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2024.3376538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3376538","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:38507376","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38507376","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3233168396","display_name":null,"funder_award_id":"61806217","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W1568042657","https://openalex.org/W2086699924","https://openalex.org/W2109910161","https://openalex.org/W2148439597","https://openalex.org/W2155772159","https://openalex.org/W2155968351","https://openalex.org/W2168640731","https://openalex.org/W2334782222","https://openalex.org/W2479936045","https://openalex.org/W2498991332","https://openalex.org/W2580629550","https://openalex.org/W2585299106","https://openalex.org/W2736601468","https://openalex.org/W2771734675","https://openalex.org/W2799151646","https://openalex.org/W2942034515","https://openalex.org/W3046093665","https://openalex.org/W3097732461","https://openalex.org/W3187550742","https://openalex.org/W3193590406","https://openalex.org/W4214717370","https://openalex.org/W4298857966","https://openalex.org/W4307230233","https://openalex.org/W6629152578","https://openalex.org/W6629438869","https://openalex.org/W6633381923","https://openalex.org/W6634137646","https://openalex.org/W6635628852","https://openalex.org/W6637967152","https://openalex.org/W6676557315","https://openalex.org/W6678610762","https://openalex.org/W6680976742","https://openalex.org/W6681478256","https://openalex.org/W6682802936","https://openalex.org/W6683443546","https://openalex.org/W6683821272","https://openalex.org/W6704571135","https://openalex.org/W6713989974","https://openalex.org/W6727349600","https://openalex.org/W6734517396","https://openalex.org/W6741002519","https://openalex.org/W6741200461","https://openalex.org/W6744597842","https://openalex.org/W6746404916","https://openalex.org/W6747473740","https://openalex.org/W6748425962","https://openalex.org/W6750186571","https://openalex.org/W6766861165","https://openalex.org/W6785386017","https://openalex.org/W6803978876","https://openalex.org/W7071570919"],"related_works":["https://openalex.org/W2770593030","https://openalex.org/W2055243143","https://openalex.org/W2943623134","https://openalex.org/W2494523064","https://openalex.org/W3154990682","https://openalex.org/W2560201613","https://openalex.org/W2215759665","https://openalex.org/W2030292806","https://openalex.org/W2171975302","https://openalex.org/W2163814182"],"abstract_inverted_index":{"Options,":[0],"the":[1,99,109,140],"temporally":[2],"extended":[3],"courses":[4],"of":[5,24,32,60,85,119],"actions":[6,45],"that":[7,126,154],"can":[8],"be":[9],"taken":[10],"at":[11],"varying":[12],"time":[13],"scale,":[14],"have":[15],"provided":[16],"a":[17,90],"concrete,":[18],"key":[19],"framework":[20],"for":[21],"learning":[22,33,82,161],"levels":[23],"temporal":[25],"abstraction":[26],"in":[27,67,131,169],"hierarchical":[28,158],"tasks.":[29,149],"While":[30],"methods":[31],"options":[34,43,68,81,92,111,165],"end-to-end":[35],"is":[36,47],"well":[37],"researched,":[38],"how":[39],"to":[40,95,98],"explore":[41],"good":[42],"and":[44,63,88,114,133,139,144,167],"simultaneously":[46],"still":[48],"challenging.":[49],"We":[50],"address":[51],"this":[52,71],"issue":[53],"by":[54,79],"maximizing":[55],"reward":[56],"augmented":[57],"with":[58],"entropies":[59],"both":[61],"option":[62],"action":[64,170],"selection":[65],"policy":[66],"learning.":[69],"To":[70],"end,":[72],"we":[73,103],"reveal":[74],"our":[75,127],"novel":[76],"optimization":[77],"objective":[78],"reformulating":[80],"from":[83],"perspective":[84],"probabilistic":[86],"inference":[87],"propose":[89,104],"soft":[91],"iteration":[93],"method":[94,128],"guarantee":[96],"convergence":[97],"optimum.":[100],"In":[101],"implementation,":[102],"an":[105],"off-policy":[106],"algorithm":[107],"called":[108],"maximum-entropy":[110],"critic":[112],"(MEOC)":[113],"evaluate":[115],"it":[116],"on":[117,136,147,157],"series":[118],"continuous":[120],"control":[121],"benchmarks.":[122],"Comparative":[123],"results":[124],"demonstrate":[125],"outperforms":[129],"baselines":[130],"efficiency":[132],"final":[134],"result":[135],"most":[137],"benchmarks,":[138],"performance":[141,162],"exhibits":[142],"superiority":[143],"robustness":[145],"especially":[146],"complex":[148],"Ablated":[150],"studies":[151],"further":[152],"explain":[153],"entropy":[155],"maximization":[156],"exploration":[159],"promotes":[160],"through":[163],"efficient":[164],"specialization":[166],"multimodality":[168],"level.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
