{"id":"https://openalex.org/W3134939669","doi":"https://doi.org/10.1109/tnnls.2021.3059912","title":"Hierarchical Reinforcement Learning With Universal Policies for Multistep Robotic Manipulation","display_name":"Hierarchical Reinforcement Learning With Universal Policies for Multistep Robotic Manipulation","publication_year":2021,"publication_date":"2021-03-01","ids":{"openalex":"https://openalex.org/W3134939669","doi":"https://doi.org/10.1109/tnnls.2021.3059912","mag":"3134939669","pmid":"https://pubmed.ncbi.nlm.nih.gov/33646961"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2021.3059912","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3059912","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063751533","display_name":"Xintong Yang","orcid":"https://orcid.org/0000-0002-7612-614X"},"institutions":[{"id":"https://openalex.org/I79510175","display_name":"Cardiff University","ror":"https://ror.org/03kk7td41","country_code":"GB","type":"education","lineage":["https://openalex.org/I79510175"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Xintong Yang","raw_affiliation_strings":["Center for Artificial Intelligence, Robotics and Human-Machine Systems (IROHMS), School of Engineering, Cardiff University, Cardiff, U.K"],"affiliations":[{"raw_affiliation_string":"Center for Artificial Intelligence, Robotics and Human-Machine Systems (IROHMS), School of Engineering, Cardiff University, Cardiff, U.K","institution_ids":["https://openalex.org/I79510175"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068175770","display_name":"Ze Ji","orcid":"https://orcid.org/0000-0002-8968-9902"},"institutions":[{"id":"https://openalex.org/I79510175","display_name":"Cardiff University","ror":"https://ror.org/03kk7td41","country_code":"GB","type":"education","lineage":["https://openalex.org/I79510175"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ze Ji","raw_affiliation_strings":["Center for Artificial Intelligence, Robotics and Human-Machine Systems (IROHMS), School of Engineering, Cardiff University, Cardiff, U.K"],"affiliations":[{"raw_affiliation_string":"Center for Artificial Intelligence, Robotics and Human-Machine Systems (IROHMS), School of Engineering, Cardiff University, Cardiff, U.K","institution_ids":["https://openalex.org/I79510175"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013863229","display_name":"Jing Wu","orcid":"https://orcid.org/0000-0001-5123-9861"},"institutions":[{"id":"https://openalex.org/I79510175","display_name":"Cardiff University","ror":"https://ror.org/03kk7td41","country_code":"GB","type":"education","lineage":["https://openalex.org/I79510175"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jing Wu","raw_affiliation_strings":["School of Computer Science and Informatics, Cardiff University, Cardiff, U.K"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Informatics, Cardiff University, Cardiff, U.K","institution_ids":["https://openalex.org/I79510175"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067850699","display_name":"Yu\u2010Kun Lai","orcid":"https://orcid.org/0000-0002-2094-5680"},"institutions":[{"id":"https://openalex.org/I79510175","display_name":"Cardiff University","ror":"https://ror.org/03kk7td41","country_code":"GB","type":"education","lineage":["https://openalex.org/I79510175"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yu-Kun Lai","raw_affiliation_strings":["School of Computer Science and Informatics, Cardiff University, Cardiff, U.K"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Informatics, Cardiff University, Cardiff, U.K","institution_ids":["https://openalex.org/I79510175"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066950886","display_name":"Changyun Wei","orcid":"https://orcid.org/0000-0002-5788-6573"},"institutions":[{"id":"https://openalex.org/I163340411","display_name":"Hohai University","ror":"https://ror.org/01wd4xt90","country_code":"CN","type":"education","lineage":["https://openalex.org/I163340411"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changyun Wei","raw_affiliation_strings":["Department of Robotics Engineering, Hohai University, Changzhou, China"],"affiliations":[{"raw_affiliation_string":"Department of Robotics Engineering, Hohai University, Changzhou, China","institution_ids":["https://openalex.org/I163340411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100714422","display_name":"Guoliang Liu","orcid":"https://orcid.org/0000-0001-6614-6932"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoliang Liu","raw_affiliation_strings":["School of Control Science and Engineering, Shandong University, Jinan, China"],"affiliations":[{"raw_affiliation_string":"School of Control Science and Engineering, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006534137","display_name":"Rossitza Setchi","orcid":"https://orcid.org/0000-0002-7207-6544"},"institutions":[{"id":"https://openalex.org/I79510175","display_name":"Cardiff University","ror":"https://ror.org/03kk7td41","country_code":"GB","type":"education","lineage":["https://openalex.org/I79510175"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Rossitza Setchi","raw_affiliation_strings":["Center for Artificial Intelligence, Robotics and Human-Machine Systems (IROHMS), School of Engineering, Cardiff University, Cardiff, U.K"],"affiliations":[{"raw_affiliation_string":"Center for Artificial Intelligence, Robotics and Human-Machine Systems (IROHMS), School of Engineering, Cardiff University, Cardiff, U.K","institution_ids":["https://openalex.org/I79510175"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5063751533"],"corresponding_institution_ids":["https://openalex.org/I79510175"],"apc_list":null,"apc_paid":null,"fwci":7.979,"has_fulltext":false,"cited_by_count":87,"citation_normalized_percentile":{"value":0.97853996,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"33","issue":"9","first_page":"4727","last_page":"4741"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9409999847412109,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8578590750694275},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7492599487304688},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.6544462442398071},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.6451995968818665},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5294318199157715},{"id":"https://openalex.org/keywords/kinematics","display_name":"Kinematics","score":0.5245031118392944},{"id":"https://openalex.org/keywords/degrees-of-freedom","display_name":"Degrees of freedom (physics and chemistry)","score":0.47275859117507935},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4285699129104614},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4257500171661377},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.4157615005970001},{"id":"https://openalex.org/keywords/control-engineering","display_name":"Control engineering","score":0.3200851082801819},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10828641057014465}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8578590750694275},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7492599487304688},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.6544462442398071},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.6451995968818665},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5294318199157715},{"id":"https://openalex.org/C39920418","wikidata":"https://www.wikidata.org/wiki/Q11476","display_name":"Kinematics","level":2,"score":0.5245031118392944},{"id":"https://openalex.org/C208081375","wikidata":"https://www.wikidata.org/wiki/Q274502","display_name":"Degrees of freedom (physics and chemistry)","level":2,"score":0.47275859117507935},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4285699129104614},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4257500171661377},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.4157615005970001},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.3200851082801819},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10828641057014465},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/tnnls.2021.3059912","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3059912","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:33646961","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/33646961","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null},{"id":"pmh:oai:http://orca.cardiff.ac.uk:138649","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401195","display_name":"ORCA Online Research @Cardiff (Cardiff University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79510175","host_organization_name":"Cardiff University","host_organization_lineage":["https://openalex.org/I79510175"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"pmh:oai:http://orca-dev.cardiff.ac.uk:138649","is_oa":false,"landing_page_url":"https://orca.cardiff.ac.uk/id/eprint/138649/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401195","display_name":"ORCA Online Research @Cardiff (Cardiff University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79510175","host_organization_name":"Cardiff University","host_organization_lineage":["https://openalex.org/I79510175"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5699999928474426,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G73347920","display_name":null,"funder_award_id":"201908440400","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"}],"funders":[{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W567721252","https://openalex.org/W1494114146","https://openalex.org/W1520597402","https://openalex.org/W1521785144","https://openalex.org/W1598748993","https://openalex.org/W2109910161","https://openalex.org/W2119709400","https://openalex.org/W2121863487","https://openalex.org/W2132622533","https://openalex.org/W2144033270","https://openalex.org/W2145339207","https://openalex.org/W2173248099","https://openalex.org/W2347074400","https://openalex.org/W2468062401","https://openalex.org/W2765602917","https://openalex.org/W2787471386","https://openalex.org/W2787938642","https://openalex.org/W2791797404","https://openalex.org/W2949604932","https://openalex.org/W2963099939","https://openalex.org/W2963761387","https://openalex.org/W2964118020","https://openalex.org/W2970786335","https://openalex.org/W2970971581","https://openalex.org/W2970990801","https://openalex.org/W2995726179","https://openalex.org/W3032077725","https://openalex.org/W4293864724","https://openalex.org/W4293872189","https://openalex.org/W6616173779","https://openalex.org/W6629438869","https://openalex.org/W6679365036","https://openalex.org/W6680986811","https://openalex.org/W6684921986","https://openalex.org/W6734215269","https://openalex.org/W6735944222","https://openalex.org/W6737327832","https://openalex.org/W6740801417","https://openalex.org/W6744935223","https://openalex.org/W6748523217","https://openalex.org/W6748599296","https://openalex.org/W6748839928","https://openalex.org/W6752089545","https://openalex.org/W6759871227","https://openalex.org/W6764088478","https://openalex.org/W6764125455","https://openalex.org/W6766978945","https://openalex.org/W6769166761","https://openalex.org/W6841393079"],"related_works":["https://openalex.org/W2789522126","https://openalex.org/W2066693961","https://openalex.org/W2368363778","https://openalex.org/W122584421","https://openalex.org/W4244295168","https://openalex.org/W2753351751","https://openalex.org/W3185180338","https://openalex.org/W2889348933","https://openalex.org/W2351643838","https://openalex.org/W3033677963"],"abstract_inverted_index":{"Multistep":[0],"tasks,":[1],"such":[2,19,65],"as":[3],"block":[4],"stacking":[5,95],"or":[6],"parts":[7],"(dis)assembly,":[8],"are":[9,113],"complex":[10,66],"for":[11,18,214],"autonomous":[12],"robotic":[13,16,49],"manipulation.":[14],"A":[15],"system":[17],"tasks":[20,67,107,200,227],"would":[21],"need":[22],"to":[23,47,63,132,136,173,186],"hierarchically":[24],"combine":[25],"motion":[26,50],"control":[27,51,71,154],"at":[28,35,169,182],"a":[29,36,77,121],"lower":[30],"level":[31,172,185],"and":[32,55,70,152,178,206,208,230,232],"symbolic":[33,150],"planning":[34,69,90,151],"higher":[37],"level.":[38],"Recently,":[39],"reinforcement":[40,124],"learning":[41,105,125,145],"(RL)-based":[42],"methods":[43,59],"have":[44,60],"been":[45],"shown":[46],"handle":[48,64],"with":[52,72,201,209,233],"better":[53],"flexibility":[54],"generalizability.":[56],"However,":[57],"these":[58,117],"limited":[61],"capability":[62],"involving":[68],"many":[73],"intermediate":[74,92],"steps":[75,93],"over":[76,91],"long":[78],"time":[79],"horizon.":[80],"First,":[81],"current":[82],"RL":[83],"systems":[84],"cannot":[85],"achieve":[86],"varied":[87,138],"outcomes":[88,139],"by":[89,159],"(e.g.,":[94],"blocks":[96,202],"in":[97,140,156],"different":[98,204,210],"orders).":[99],"Second,":[100],"the":[101,134,170,175,183],"exploration":[102,166],"efficiency":[103],"of":[104,203,212],"multistep":[106,141,198,225],"is":[108],"low,":[109],"especially":[110],"when":[111],"rewards":[112],"sparse.":[114],"To":[115,143,189],"address":[116],"limitations,":[118],"we":[119,147,193],"develop":[120],"unified":[122],"hierarchical":[123],"framework,":[126],"named":[127],"Universal":[128],"Option":[129],"Framework":[130],"(UOF),":[131],"enable":[133],"agent":[135],"learn":[137],"tasks.":[142],"improve":[144],"efficiency,":[146],"train":[148],"both":[149],"kinematic":[153],"policies":[155],"parallel,":[157],"aided":[158],"two":[160],"proposed":[161],"techniques:":[162],"1)":[163],"an":[164],"auto-adjusting":[165],"strategy":[167],"(AAES)":[168],"low":[171],"stabilize":[174],"parallel":[176],"training,":[177],"2)":[179],"abstract":[180],"demonstrations":[181],"high":[184],"accelerate":[187],"convergence.":[188],"evaluate":[190],"its":[191],"performance,":[192],"performed":[194],"experiments":[195],"on":[196],"various":[197],"block-stacking":[199],"shapes":[205],"combinations":[207],"degrees":[211],"freedom":[213],"robot":[215],"control.":[216],"The":[217],"results":[218],"demonstrate":[219],"that":[220],"our":[221],"method":[222],"can":[223],"accomplish":[224],"manipulation":[226],"more":[228],"efficiently":[229],"stably,":[231],"significantly":[234],"less":[235],"memory":[236],"consumption.":[237]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":24},{"year":2024,"cited_by_count":23},{"year":2023,"cited_by_count":19},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":4}],"updated_date":"2026-04-12T07:58:50.170612","created_date":"2025-10-10T00:00:00"}
