{"id":"https://openalex.org/W4391454473","doi":"https://doi.org/10.1109/tnnls.2024.3354061","title":"Goal-Conditioned Hierarchical Reinforcement Learning With High-Level Model Approximation","display_name":"Goal-Conditioned Hierarchical Reinforcement Learning With High-Level Model Approximation","publication_year":2024,"publication_date":"2024-02-01","ids":{"openalex":"https://openalex.org/W4391454473","doi":"https://doi.org/10.1109/tnnls.2024.3354061","pmid":"https://pubmed.ncbi.nlm.nih.gov/38300770"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2024.3354061","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3354061","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100440778","display_name":"Yu Luo","orcid":"https://orcid.org/0000-0001-6229-4639"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yu Luo","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079511023","display_name":"Tianying Ji","orcid":"https://orcid.org/0009-0006-8949-5880"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianying Ji","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055546056","display_name":"Fuchun Sun","orcid":"https://orcid.org/0000-0003-3546-6305"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fuchun Sun","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041101317","display_name":"Huaping Liu","orcid":"https://orcid.org/0000-0002-4042-6044"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaping Liu","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100326970","display_name":"Jianwei Zhang","orcid":"https://orcid.org/0000-0002-7856-5760"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jianwei Zhang","raw_affiliation_strings":["Department of Informatics, University of Hamburg, Hamburg, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, University of Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045074471","display_name":"Mingxuan Jing","orcid":"https://orcid.org/0009-0009-4335-9455"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingxuan Jing","raw_affiliation_strings":["Science and Technology on Integrated Information System Laboratory, Institute of Software, Chinese Academy of Sciences, Beijing, China","Institute of Software, Science and Technology on Integrated Information System Laboratory, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Science and Technology on Integrated Information System Laboratory, Institute of Software, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Software, Science and Technology on Integrated Information System Laboratory, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032642601","display_name":"Wenbing Huang","orcid":"https://orcid.org/0000-0002-2566-4159"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbing Huang","raw_affiliation_strings":["Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100440778"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":5.0765,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.95701414,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"36","issue":"2","first_page":"2705","last_page":"2719"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7586878538131714},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7454278469085693},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5907501578330994},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5303685665130615},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.4988996982574463},{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.4969468414783478},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.4859527349472046},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.41574180126190186},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3916274607181549},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.36903059482574463},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.31802064180374146},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.23617026209831238}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7586878538131714},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7454278469085693},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5907501578330994},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5303685665130615},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.4988996982574463},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.4969468414783478},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.4859527349472046},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.41574180126190186},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3916274607181549},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.36903059482574463},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31802064180374146},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.23617026209831238},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2024.3354061","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3354061","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:38300770","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38300770","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8771408983","display_name":null,"funder_award_id":"2018AAA0102900","funder_id":"https://openalex.org/F4320321540","funder_display_name":"Ministry of Science and Technology of the People's Republic of China"}],"funders":[{"id":"https://openalex.org/F4320321540","display_name":"Ministry of Science and Technology of the People's Republic of China","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":65,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1592847719","https://openalex.org/W1977655452","https://openalex.org/W1989517777","https://openalex.org/W2086932416","https://openalex.org/W2109910161","https://openalex.org/W2145339207","https://openalex.org/W2584986912","https://openalex.org/W2736601468","https://openalex.org/W2907916537","https://openalex.org/W2962750014","https://openalex.org/W2962872206","https://openalex.org/W2963439114","https://openalex.org/W2963761387","https://openalex.org/W2964227312","https://openalex.org/W2986779468","https://openalex.org/W2988685755","https://openalex.org/W3020125231","https://openalex.org/W3134939669","https://openalex.org/W3168892396","https://openalex.org/W3174406242","https://openalex.org/W3174733757","https://openalex.org/W3208890675","https://openalex.org/W3214001120","https://openalex.org/W4214717370","https://openalex.org/W4281550413","https://openalex.org/W4285818475","https://openalex.org/W4386702747","https://openalex.org/W6631190155","https://openalex.org/W6638018090","https://openalex.org/W6639039794","https://openalex.org/W6675999342","https://openalex.org/W6676833704","https://openalex.org/W6683821272","https://openalex.org/W6684205842","https://openalex.org/W6704571135","https://openalex.org/W6717230150","https://openalex.org/W6734215269","https://openalex.org/W6741002519","https://openalex.org/W6747473740","https://openalex.org/W6748012927","https://openalex.org/W6748519856","https://openalex.org/W6748839928","https://openalex.org/W6748848655","https://openalex.org/W6752089545","https://openalex.org/W6754302822","https://openalex.org/W6759871227","https://openalex.org/W6763329203","https://openalex.org/W6764088478","https://openalex.org/W6764173040","https://openalex.org/W6767997047","https://openalex.org/W6768220214","https://openalex.org/W6769500229","https://openalex.org/W6773937831","https://openalex.org/W6779715229","https://openalex.org/W6782754186","https://openalex.org/W6786111744","https://openalex.org/W6790486821","https://openalex.org/W6790970192","https://openalex.org/W6791474689","https://openalex.org/W6796254985","https://openalex.org/W6797805403","https://openalex.org/W6802965437","https://openalex.org/W6803726439","https://openalex.org/W6810588314"],"related_works":["https://openalex.org/W2971351794","https://openalex.org/W4376155396","https://openalex.org/W1947085858","https://openalex.org/W2174986909","https://openalex.org/W2527791220","https://openalex.org/W2101991911","https://openalex.org/W2155070487","https://openalex.org/W1925875298","https://openalex.org/W4292701710","https://openalex.org/W2768698792"],"abstract_inverted_index":{"Hierarchical":[0],"reinforcement":[1],"learning":[2],"(HRL)":[3],"exhibits":[4],"remarkable":[5],"potential":[6],"in":[7,81,175],"addressing":[8],"large-scale":[9],"and":[10,38,60,146,161,172,180],"long-horizon":[11],"complex":[12,159],"tasks.":[13],"However,":[14],"a":[15,48,65,82,114],"fundamental":[16],"challenge,":[17],"which":[18],"arises":[19],"from":[20],"the":[21,35,75,79,87,90,94,101,123,138,189],"inherently":[22],"entangled":[23],"nature":[24],"of":[25,41,78,89,93,104,125,141,177,191,194],"hierarchical":[26],"policies,":[27],"has":[28],"not":[29],"been":[30],"understood":[31],"well,":[32],"consequently":[33],"compromising":[34],"training":[36],"stability":[37],"exploration":[39],"efficiency":[40,179],"HRL.":[42],"In":[43,63,183],"this":[44,133],"article,":[45],"we":[46,99,136],"propose":[47],"novel":[49],"HRL":[50,173],"algorithm,":[51],"high-level":[52,70],"model":[53,72],"approximation":[54],"(HLMA),":[55],"presenting":[56],"both":[57],"theoretical":[58],"foundations":[59],"practical":[61],"implementations.":[62],"HLMA,":[64],"Planner":[66],"constructs":[67],"an":[68],"innovative":[69],"dynamic":[71],"to":[73,151],"predict":[74],"-step":[76],"transition":[77],"Controller":[80,118],"subtask.":[83],"This":[84,120],"allows":[85],"for":[86],"estimation":[88],"evolving":[91],"performance":[92],"Controller.":[95],"At":[96],"low":[97],"level,":[98],"leverage":[100],"initial":[102],"state":[103],"each":[105,142,192],"subtask,":[106],"transforming":[107],"absolute":[108],"states":[109],"into":[110],"relative":[111],"deviations":[112],"by":[113],"designed":[115,134],"operator":[116],"as":[117],"input.":[119],"approach":[121],"facilitates":[122],"reuse":[124],"subtask":[126],"domain":[127],"knowledge,":[128],"enhancing":[129],"data":[130],"efficiency.":[131],"With":[132],"structure,":[135],"establish":[137],"local":[139],"convergence":[140],"component":[143,193],"within":[144],"HLMA":[145,166],"subsequently":[147],"derive":[148],"regret":[149],"bounds":[150],"ensure":[152],"global":[153],"convergence.":[154],"Abundant":[155],"experiments":[156],"conducted":[157],"on":[158],"locomotion":[160],"navigation":[162],"tasks":[163],"demonstrate":[164],"that":[165],"surpasses":[167],"other":[168],"state-of-the-art":[169],"single-level":[170],"RL":[171],"algorithms":[174],"terms":[176],"sample":[178],"asymptotic":[181],"performance.":[182],"addition,":[184],"thorough":[185],"ablation":[186],"studies":[187],"validate":[188],"effectiveness":[190],"HLMA.":[195]},"counts_by_year":[{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
