{"id":"https://openalex.org/W2802349643","doi":"https://doi.org/10.1109/access.2018.2854283","title":"A Deep Hierarchical Reinforcement Learning Algorithm in Partially Observable Markov Decision Processes","display_name":"A Deep Hierarchical Reinforcement Learning Algorithm in Partially Observable Markov Decision Processes","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2802349643","doi":"https://doi.org/10.1109/access.2018.2854283","mag":"2802349643"},"language":"en","primary_location":{"id":"doi:10.1109/access.2018.2854283","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2018.2854283","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2018.2854283","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034496104","display_name":"Tuyen P. Le","orcid":"https://orcid.org/0000-0002-1345-2650"},"institutions":[{"id":"https://openalex.org/I35928602","display_name":"Kyung Hee University","ror":"https://ror.org/01zqcg218","country_code":"KR","type":"education","lineage":["https://openalex.org/I35928602"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Tuyen P. Le","raw_affiliation_strings":["Artificial Intelligence Laboratory, Kyung Hee University, Global Campus, Yongin, South Korea"],"raw_orcid":"https://orcid.org/0000-0002-1345-2650","affiliations":[{"raw_affiliation_string":"Artificial Intelligence Laboratory, Kyung Hee University, Global Campus, Yongin, South Korea","institution_ids":["https://openalex.org/I35928602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043775193","display_name":"Ngo Anh Vien","orcid":"https://orcid.org/0000-0001-9646-267X"},"institutions":[{"id":"https://openalex.org/I126231945","display_name":"Queen's University Belfast","ror":"https://ror.org/00hswnk62","country_code":"GB","type":"education","lineage":["https://openalex.org/I126231945"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ngo Anh Vien","raw_affiliation_strings":["EEECS/ECIT, Queen\u2019s University Belfast, Belfast, U.K","EEECS/ECIT, Queen's University Belfast, Belfast, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"EEECS/ECIT, Queen\u2019s University Belfast, Belfast, U.K","institution_ids":["https://openalex.org/I126231945"]},{"raw_affiliation_string":"EEECS/ECIT, Queen's University Belfast, Belfast, U.K","institution_ids":["https://openalex.org/I126231945"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112330829","display_name":"TaeChoong Chung","orcid":null},"institutions":[{"id":"https://openalex.org/I35928602","display_name":"Kyung Hee University","ror":"https://ror.org/01zqcg218","country_code":"KR","type":"education","lineage":["https://openalex.org/I35928602"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"TaeChoong Chung","raw_affiliation_strings":["Artificial Intelligence Laboratory, Kyung Hee University, Global Campus, Yongin, South Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Laboratory, Kyung Hee University, Global Campus, Yongin, South Korea","institution_ids":["https://openalex.org/I35928602"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":5.2376,"has_fulltext":true,"cited_by_count":60,"citation_normalized_percentile":{"value":0.96320885,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"6","issue":null,"first_page":"49089","last_page":"49102"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9783999919891357,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.911853551864624},{"id":"https://openalex.org/keywords/partially-observable-markov-decision-process","display_name":"Partially observable Markov decision process","score":0.8862701654434204},{"id":"https://openalex.org/keywords/observability","display_name":"Observability","score":0.8046191930770874},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7405157089233398},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6800253391265869},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6766430139541626},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.49843454360961914},{"id":"https://openalex.org/keywords/observable","display_name":"Observable","score":0.4459618926048279},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.39734792709350586},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.34271439909935},{"id":"https://openalex.org/keywords/markov-model","display_name":"Markov model","score":0.28695791959762573},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1224890649318695}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.911853551864624},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.8862701654434204},{"id":"https://openalex.org/C36299963","wikidata":"https://www.wikidata.org/wiki/Q1369844","display_name":"Observability","level":2,"score":0.8046191930770874},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7405157089233398},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6800253391265869},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6766430139541626},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49843454360961914},{"id":"https://openalex.org/C32848918","wikidata":"https://www.wikidata.org/wiki/Q845789","display_name":"Observable","level":2,"score":0.4459618926048279},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.39734792709350586},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.34271439909935},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.28695791959762573},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1224890649318695},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/access.2018.2854283","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2018.2854283","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1805.04419","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1805.04419","pdf_url":"https://arxiv.org/pdf/1805.04419","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:pure.qub.ac.uk/portal:publications/02dde14f-28df-4d24-9b42-b904fa302c37","is_oa":true,"landing_page_url":"https://pure.qub.ac.uk/en/publications/02dde14f-28df-4d24-9b42-b904fa302c37","pdf_url":"https://pureadmin.qub.ac.uk/ws/files/160058498/DeepHierarch.pdf","source":{"id":"https://openalex.org/S4306402319","display_name":"Research Portal (Queen's University Belfast)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I126231945","host_organization_name":"Queen's University Belfast","host_organization_lineage":["https://openalex.org/I126231945"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Le , T P , Ngo , V , Layek , M A &amp; Chung , T 2018 , ' Deep Hierarchical Reinforcement Learning Algorithm in Partially Observable Markov Decision Processes ' , IEEE Access , vol. 6 , pp. 49089 - 49102 . https://doi.org/10.1109/ACCESS.2018.2854283","raw_type":"article"},{"id":"pmh:oai:doaj.org/article:3be1a47d41c84d2996029d49b53536eb","is_oa":true,"landing_page_url":"https://doaj.org/article/3be1a47d41c84d2996029d49b53536eb","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 6, Pp 49089-49102 (2018)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2018.2854283","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2018.2854283","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.75}],"awards":[{"id":"https://openalex.org/G2905686298","display_name":null,"funder_award_id":"NRF-2017R1D1A1B04036354","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":90,"referenced_works":["https://openalex.org/W16011919","https://openalex.org/W297984285","https://openalex.org/W1486707268","https://openalex.org/W1491302875","https://openalex.org/W1505837856","https://openalex.org/W1515851193","https://openalex.org/W1522301498","https://openalex.org/W1568042657","https://openalex.org/W1590744975","https://openalex.org/W1592847719","https://openalex.org/W1757796397","https://openalex.org/W1771410628","https://openalex.org/W1914583973","https://openalex.org/W1977655452","https://openalex.org/W2012587148","https://openalex.org/W2034806191","https://openalex.org/W2049783975","https://openalex.org/W2080039641","https://openalex.org/W2098172344","https://openalex.org/W2101355568","https://openalex.org/W2109910161","https://openalex.org/W2118490768","https://openalex.org/W2121092017","https://openalex.org/W2121517924","https://openalex.org/W2121616705","https://openalex.org/W2127107099","https://openalex.org/W2144558232","https://openalex.org/W2145339207","https://openalex.org/W2155027007","https://openalex.org/W2155968351","https://openalex.org/W2156737235","https://openalex.org/W2158548602","https://openalex.org/W2160371091","https://openalex.org/W2164424353","https://openalex.org/W2168359464","https://openalex.org/W2169015875","https://openalex.org/W2170899200","https://openalex.org/W2172968643","https://openalex.org/W2173248099","https://openalex.org/W2173564293","https://openalex.org/W2174072647","https://openalex.org/W2174196774","https://openalex.org/W2201581102","https://openalex.org/W2257979135","https://openalex.org/W2271840356","https://openalex.org/W2277639078","https://openalex.org/W2335959470","https://openalex.org/W2344556769","https://openalex.org/W2344786740","https://openalex.org/W2419612459","https://openalex.org/W2428834750","https://openalex.org/W2522489477","https://openalex.org/W2523728418","https://openalex.org/W2596111687","https://openalex.org/W2605016475","https://openalex.org/W2746553466","https://openalex.org/W2949267040","https://openalex.org/W2949608212","https://openalex.org/W2950471160","https://openalex.org/W2951799221","https://openalex.org/W2962730405","https://openalex.org/W2962938178","https://openalex.org/W2963262099","https://openalex.org/W2963276097","https://openalex.org/W2963864421","https://openalex.org/W2964227312","https://openalex.org/W3037207827","https://openalex.org/W4205513846","https://openalex.org/W4211089519","https://openalex.org/W4295184135","https://openalex.org/W4295313133","https://openalex.org/W4298857966","https://openalex.org/W6610758747","https://openalex.org/W6628902087","https://openalex.org/W6629325184","https://openalex.org/W6634137646","https://openalex.org/W6638018090","https://openalex.org/W6677939520","https://openalex.org/W6683195989","https://openalex.org/W6683204974","https://openalex.org/W6683443546","https://openalex.org/W6683821272","https://openalex.org/W6685200453","https://openalex.org/W6685757253","https://openalex.org/W6696324988","https://openalex.org/W6703271639","https://openalex.org/W6717230150","https://openalex.org/W6727252785","https://openalex.org/W6727349600","https://openalex.org/W7054892219"],"related_works":["https://openalex.org/W2999848267","https://openalex.org/W2096013579","https://openalex.org/W52153049","https://openalex.org/W1760611253","https://openalex.org/W1515117609","https://openalex.org/W1589140671","https://openalex.org/W4323315247","https://openalex.org/W131709709","https://openalex.org/W2294884454","https://openalex.org/W3169161914"],"abstract_inverted_index":{"In":[0,110],"recent":[1],"years,":[2],"reinforcement":[3,32,58,137],"learning":[4,17,33,39,59,138,141],"(RL)":[5],"has":[6],"achieved":[7],"remarkable":[8],"success":[9],"due":[10],"to":[11,46,154],"the":[12,20,71,122,162],"growing":[13],"adoption":[14],"of":[15,23,54,94],"deep":[16,136,146],"techniques":[18],"and":[19,40,89,128,157],"rapid":[21],"growth":[22],"computing":[24],"power.":[25],"Nevertheless,":[26],"it":[27],"is":[28,60,150],"well-known":[29],"that":[30,64],"flat":[31],"algorithms":[34],"are":[35,41,86],"often":[36,87],"have":[37,78,124],"trouble":[38],"even":[42],"data-efficient":[43],"with":[44],"respect":[45],"tasks":[47,76,123],"having":[48],"hierarchical":[49,115,130,135,143,147,168],"structures,":[50],"e.g.,":[51],"those":[52],"consisting":[53],"multiple":[55],"subtasks.":[56],"Hierarchical":[57],"a":[61,103,118,134],"principled":[62],"approach":[63,139],"can":[65,99],"tackle":[66],"such":[67,97],"challenging":[68,167],"tasks.":[69],"On":[70],"other":[72],"hand,":[73],"many":[74],"real-world":[75],"usually":[77],"only":[79,125],"partial":[80,126],"observability":[81,127],"in":[82,96,117,120,142],"which":[83,121],"state":[84],"measurements":[85],"imperfect":[88],"partially":[90,104],"observable.":[91],"The":[92,145],"problems":[93],"RL":[95,116,148],"settings":[98],"be":[100],"formulated":[101],"as":[102],"observable":[105],"Markov":[106],"decision":[107],"process":[108],"(POMDP).":[109],"this":[111],"paper,":[112],"we":[113],"study":[114],"POMDP":[119,158],"possess":[129],"properties.":[131],"We":[132,160],"propose":[133],"for":[140,152],"POMDP.":[144],"algorithm":[149,164],"proposed":[151,163],"domains":[153],"both":[155],"MDP":[156],"learning.":[159],"evaluate":[161],"using":[165],"various":[166],"POMDPs.":[169]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2018-05-17T00:00:00"}
