{"id":"https://openalex.org/W3146696145","doi":"https://doi.org/10.1109/lra.2021.3071062","title":"Planning-Augmented Hierarchical Reinforcement Learning","display_name":"Planning-Augmented Hierarchical Reinforcement Learning","publication_year":2021,"publication_date":"2021-04-05","ids":{"openalex":"https://openalex.org/W3146696145","doi":"https://doi.org/10.1109/lra.2021.3071062","mag":"3146696145"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2021.3071062","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2021.3071062","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046661363","display_name":"Robert Gieselmann","orcid":"https://orcid.org/0000-0002-1772-7930"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Robert Gieselmann","raw_affiliation_strings":["RPL, EECS, KTH Royal Institute of Technology, Stockholm, Sweden"],"raw_orcid":"https://orcid.org/0000-0002-1772-7930","affiliations":[{"raw_affiliation_string":"RPL, EECS, KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018027629","display_name":"Florian T. Pokorny","orcid":"https://orcid.org/0000-0003-1114-6040"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Florian T. Pokorny","raw_affiliation_strings":["RPL, EECS, KTH Royal Institute of Technology, Stockholm, Sweden"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"RPL, EECS, KTH Royal Institute of Technology, Stockholm, Sweden","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5046661363"],"corresponding_institution_ids":["https://openalex.org/I86987016"],"apc_list":null,"apc_paid":null,"fwci":1.6486,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.85650644,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"6","issue":"3","first_page":"5097","last_page":"5104"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8789990544319153},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7512596845626831},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7322326302528381},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.5806265473365784},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5385907888412476},{"id":"https://openalex.org/keywords/time-horizon","display_name":"Time horizon","score":0.5264530777931213},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4731800854206085},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.4538704752922058},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.44963735342025757},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.446397602558136},{"id":"https://openalex.org/keywords/motion-planning","display_name":"Motion planning","score":0.4401954710483551},{"id":"https://openalex.org/keywords/planner","display_name":"Planner","score":0.4370328485965729},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.41561833024024963},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37274444103240967},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.29338788986206055},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.27855145931243896},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.21806025505065918},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09583917260169983},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08721455931663513}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8789990544319153},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7512596845626831},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7322326302528381},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.5806265473365784},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5385907888412476},{"id":"https://openalex.org/C28761237","wikidata":"https://www.wikidata.org/wiki/Q7805321","display_name":"Time horizon","level":2,"score":0.5264530777931213},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4731800854206085},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.4538704752922058},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.44963735342025757},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.446397602558136},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.4401954710483551},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.4370328485965729},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.41561833024024963},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37274444103240967},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.29338788986206055},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.27855145931243896},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.21806025505065918},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09583917260169983},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08721455931663513},{"id":"https://openalex.org/C34447519","wikidata":"https://www.wikidata.org/wiki/Q179522","display_name":"Market economy","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2021.3071062","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2021.3071062","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":68,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W131069610","https://openalex.org/W1424654272","https://openalex.org/W1542595278","https://openalex.org/W1594201624","https://openalex.org/W2121863487","https://openalex.org/W2169528473","https://openalex.org/W2173248099","https://openalex.org/W2257979135","https://openalex.org/W2593841437","https://openalex.org/W2599174273","https://openalex.org/W2803281228","https://openalex.org/W2804010078","https://openalex.org/W2804672169","https://openalex.org/W2904177032","https://openalex.org/W2914584948","https://openalex.org/W2950614095","https://openalex.org/W2962767126","https://openalex.org/W2962917939","https://openalex.org/W2963099939","https://openalex.org/W2963245725","https://openalex.org/W2963262099","https://openalex.org/W2963423916","https://openalex.org/W2963619650","https://openalex.org/W2963645756","https://openalex.org/W2963864421","https://openalex.org/W2964001908","https://openalex.org/W2964077562","https://openalex.org/W2964238766","https://openalex.org/W2967293465","https://openalex.org/W2970387978","https://openalex.org/W2970720334","https://openalex.org/W2982437637","https://openalex.org/W3010717779","https://openalex.org/W3021208093","https://openalex.org/W3029907520","https://openalex.org/W3032223429","https://openalex.org/W3034888459","https://openalex.org/W3104216473","https://openalex.org/W3108859470","https://openalex.org/W3128467746","https://openalex.org/W3131655564","https://openalex.org/W4288021424","https://openalex.org/W4288089349","https://openalex.org/W4288331462","https://openalex.org/W4294225490","https://openalex.org/W4295352814","https://openalex.org/W4298876402","https://openalex.org/W4300799055","https://openalex.org/W6605295560","https://openalex.org/W6635701881","https://openalex.org/W6683300800","https://openalex.org/W6684921986","https://openalex.org/W6703271639","https://openalex.org/W6740801417","https://openalex.org/W6748848655","https://openalex.org/W6751720535","https://openalex.org/W6752089545","https://openalex.org/W6752217255","https://openalex.org/W6752446230","https://openalex.org/W6764173040","https://openalex.org/W6766413688","https://openalex.org/W6767649332","https://openalex.org/W6769673253","https://openalex.org/W6770044267","https://openalex.org/W6772121735","https://openalex.org/W6772264461","https://openalex.org/W6775063032"],"related_works":["https://openalex.org/W3096874164","https://openalex.org/W2937181779","https://openalex.org/W2386410636","https://openalex.org/W1985560493","https://openalex.org/W2357975469","https://openalex.org/W2145363145","https://openalex.org/W1626977535","https://openalex.org/W4284974072","https://openalex.org/W2341346307","https://openalex.org/W4225269853"],"abstract_inverted_index":{"Planning":[0],"algorithms":[1],"are":[2,14,146],"powerful":[3],"at":[4,169],"solving":[5],"long-horizon":[6,53],"decision-making":[7],"problems":[8,116,189],"but":[9],"require":[10],"that":[11],"environment":[12],"dynamics":[13],"known.":[15],"Model-free":[16],"reinforcement":[17],"learning":[18],"has":[19],"recently":[20],"been":[21],"merged":[22],"with":[23,117],"graph-based":[24],"planning":[25,42],"to":[26,40,45,61,114],"increase":[27],"the":[28,50,66,72,109,127,154,170,174,198],"robustness":[29],"of":[30,68,82,111,140,153,173,178,186,200],"trained":[31,147],"policies":[32],"in":[33,43,52,65,148,190],"state-space":[34],"navigation":[35],"problems.":[36],"Recent":[37],"ideas":[38],"suggest":[39],"use":[41],"order":[44],"provide":[46],"intermediate":[47],"waypoints":[48],"guiding":[49],"policy":[51],"tasks.":[54],"Yet,":[55],"it":[56],"is":[57,74,181],"not":[58],"always":[59],"practical":[60],"describe":[62],"a":[63,98,122,132,138,158,165,184,201],"problem":[64],"setting":[67],"state-to-state":[69],"navigation.":[70],"Often,":[71],"goal":[73],"defined":[75,119],"by":[76],"one":[77],"or":[78,85],"multiple":[79],"disjoint":[80],"sets":[81],"valid":[83],"states":[84],"implicitly":[86,118],"using":[87],"an":[88],"abstract":[89],"task":[90],"description.":[91],"Building":[92],"upon":[93],"previous":[94],"efforts,":[95],"we":[96,125],"introduce":[97],"novel":[99],"algorithm":[100],"called":[101],"Planning-Augmented":[102],"Hierarchical":[103],"Reinforcement":[104],"Learning":[105],"(PAHRL)":[106],"which":[107],"translates":[108],"concept":[110],"hybrid":[112],"planning/RL":[113],"such":[115],"goal.":[120],"Using":[121],"hierarchical":[123],"framework,":[124],"divide":[126],"original":[128],"task,":[129],"formulated":[130],"as":[131],"Markov":[133],"Decision":[134],"Process":[135],"(MDP),":[136],"into":[137],"hierarchy":[139],"shorter":[141],"horizon":[142],"MDPs.":[143],"Actor-critic":[144],"agents":[145],"parallel":[149],"for":[150,183],"each":[151],"level":[152,172],"hierarchy.":[155,175],"During":[156],"testing,":[157],"planner":[159],"then":[160],"determines":[161],"useful":[162],"subgoals":[163],"on":[164],"state":[166],"graph":[167],"constructed":[168],"bottom":[171],"The":[176],"effectiveness":[177],"our":[179],"approach":[180],"demonstrated":[182],"set":[185],"continuous":[187],"control":[188],"simulation":[191],"including":[192],"robot":[193],"arm":[194],"reaching":[195],"tasks":[196],"and":[197],"manipulation":[199],"deformable":[202],"object.":[203]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2026-05-03T08:25:01.440150","created_date":"2025-10-10T00:00:00"}
