{"id":"https://openalex.org/W3174733757","doi":"https://doi.org/10.1109/tnnls.2021.3087733","title":"End-to-End Hierarchical Reinforcement Learning With Integrated Subgoal Discovery","display_name":"End-to-End Hierarchical Reinforcement Learning With Integrated Subgoal Discovery","publication_year":2021,"publication_date":"2021-06-22","ids":{"openalex":"https://openalex.org/W3174733757","doi":"https://doi.org/10.1109/tnnls.2021.3087733","mag":"3174733757","pmid":"https://pubmed.ncbi.nlm.nih.gov/34156954"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2021.3087733","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3087733","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081835375","display_name":"Shubham Pateria","orcid":"https://orcid.org/0000-0002-6507-4479"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Shubham Pateria","raw_affiliation_strings":["School of Computer Science and Engineering, Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-6507-4479","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062773086","display_name":"Budhitama Subagdja","orcid":"https://orcid.org/0000-0001-9774-0264"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Budhitama Subagdja","raw_affiliation_strings":["School of Computing and Information Systems, Singapore Management University, Singapore"],"raw_orcid":"https://orcid.org/0000-0001-9774-0264","affiliations":[{"raw_affiliation_string":"School of Computing and Information Systems, Singapore Management University, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004167801","display_name":"Ah\u2010Hwee Tan","orcid":"https://orcid.org/0000-0003-0378-4069"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ah-Hwee Tan","raw_affiliation_strings":["School of Computing and Information Systems, Singapore Management University, Singapore"],"raw_orcid":"https://orcid.org/0000-0003-0378-4069","affiliations":[{"raw_affiliation_string":"School of Computing and Information Systems, Singapore Management University, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063229067","display_name":"Chai Quek","orcid":"https://orcid.org/0000-0002-7313-4339"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Chai Quek","raw_affiliation_strings":["School of Computer Science and Engineering, Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-7313-4339","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5081835375"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":3.9195,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.94552701,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"33","issue":"12","first_page":"7778","last_page":"7790"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.935699999332428,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9297000169754028,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8306833505630493},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.785245418548584},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6642119884490967},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.6622627377510071},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.6115704774856567},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.5829029083251953},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5724291801452637},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5637609958648682},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.499248743057251},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.45582354068756104},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.439683198928833},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37779754400253296},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1483897566795349},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10664013028144836}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8306833505630493},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.785245418548584},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6642119884490967},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.6622627377510071},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.6115704774856567},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.5829029083251953},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5724291801452637},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5637609958648682},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.499248743057251},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.45582354068756104},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.439683198928833},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37779754400253296},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1483897566795349},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10664013028144836},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C34447519","wikidata":"https://www.wikidata.org/wiki/Q179522","display_name":"Market economy","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D007858","descriptor_name":"Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007858","descriptor_name":"Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007858","descriptor_name":"Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011336","descriptor_name":"Probability","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011336","descriptor_name":"Probability","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011336","descriptor_name":"Probability","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1109/tnnls.2021.3087733","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3087733","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:34156954","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34156954","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-7419","is_oa":false,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/6416","pdf_url":null,"source":{"id":"https://openalex.org/S4377196871","display_name":"Institutional Knowledge (InK) - Institutional Knowledge at Singapore Management University (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1109/tnnls.2021.3087733","raw_type":"Journal Article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G478423709","display_name":null,"funder_award_id":"AISG2-RP-2020-019","funder_id":"https://openalex.org/F4320320709","funder_display_name":"National Research Foundation Singapore"}],"funders":[{"id":"https://openalex.org/F4320320709","display_name":"National Research Foundation Singapore","ror":"https://ror.org/03cpyc314"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W92634156","https://openalex.org/W567721252","https://openalex.org/W1536990779","https://openalex.org/W1592847719","https://openalex.org/W2007098590","https://openalex.org/W2041141995","https://openalex.org/W2090170171","https://openalex.org/W2109910161","https://openalex.org/W2145339207","https://openalex.org/W2158782408","https://openalex.org/W2160371091","https://openalex.org/W2160808139","https://openalex.org/W2168640731","https://openalex.org/W2342662072","https://openalex.org/W2523124567","https://openalex.org/W2528237000","https://openalex.org/W2550612212","https://openalex.org/W2775245461","https://openalex.org/W2791797404","https://openalex.org/W2890648518","https://openalex.org/W2920215304","https://openalex.org/W2949267040","https://openalex.org/W2950040888","https://openalex.org/W2950614095","https://openalex.org/W2963276097","https://openalex.org/W2963761387","https://openalex.org/W2963864421","https://openalex.org/W3036329728","https://openalex.org/W3037399823","https://openalex.org/W4288331462","https://openalex.org/W4294225490","https://openalex.org/W4300799055","https://openalex.org/W6603722601","https://openalex.org/W6616173779","https://openalex.org/W6683821272","https://openalex.org/W6684921986","https://openalex.org/W6704571135","https://openalex.org/W6717230150","https://openalex.org/W6729730268","https://openalex.org/W6734215269","https://openalex.org/W6734325300","https://openalex.org/W6740801417","https://openalex.org/W6748848655","https://openalex.org/W6752089545","https://openalex.org/W6759871227","https://openalex.org/W6764173040","https://openalex.org/W6779684978"],"related_works":["https://openalex.org/W2329500892","https://openalex.org/W2976657239","https://openalex.org/W4225571923","https://openalex.org/W3212257828","https://openalex.org/W2999580272","https://openalex.org/W4297873223","https://openalex.org/W2350784623","https://openalex.org/W2126211886","https://openalex.org/W3009457412","https://openalex.org/W2992629954"],"abstract_inverted_index":{"Hierarchical":[0],"reinforcement":[1],"learning":[2,72],"(HRL)":[3],"is":[4,83],"a":[5,20,34,67,74,120,137,152],"promising":[6],"approach":[7],"to":[8,41,60,130],"perform":[9],"long-horizon":[10],"goal-reaching":[11],"tasks":[12,142],"by":[13,52,112],"decomposing":[14],"the":[15,43,54,58,63,80,105,109,116,131,144,172],"goals":[16],"into":[17],"subgoals.":[18],"In":[19],"holistic":[21],"HRL":[22,48,95,155],"paradigm,":[23],"an":[24,93,98],"agent":[25],"must":[26],"autonomously":[27],"discover":[28],"such":[29,73],"subgoals":[30,65,91,117],"and":[31],"also":[32],"learn":[33],"hierarchy":[35,59],"of":[36,89,108,123,139,171],"policies":[37],"that":[38,103,118,160],"uses":[39],"them":[40],"reach":[42],"goals.":[44],"Recently":[45],"introduced":[46],"end-to-end":[47,94,154],"methods":[49],"accomplish":[50],"this":[51],"using":[53],"higher-level":[55,110],"policy":[56,75],"in":[57,66,143,169],"directly":[61],"search":[62,106],"useful":[64],"continuous":[68,140],"subgoal":[69,81,100],"space.":[70],"However,":[71],"may":[76],"be":[77],"challenging":[78],"when":[79],"space":[82,107],"large.":[84],"We":[85,133],"propose":[86],"integrated":[87,99],"discovery":[88,101],"salient":[90],"(LIDOSS),":[92],"method":[96],"with":[97],"heuristic":[102],"reduces":[104],"policy,":[111],"explicitly":[113],"focusing":[114],"on":[115,125,136],"have":[119],"greater":[121],"probability":[122],"occurrence":[124],"various":[126],"state-transition":[127],"trajectories":[128],"leading":[129],"goal.":[132],"evaluate":[134],"LIDOSS":[135,161],"set":[138],"control":[141],"MuJoCo":[145],"domain":[146],"against":[147],"hierarchical":[148],"actor":[149],"critic":[150],"(HAC),":[151],"state-of-the-art":[153],"method.":[156],"The":[157],"results":[158],"show":[159],"attains":[162],"better":[163],"goal":[164],"achievement":[165],"rates":[166],"than":[167],"HAC":[168],"most":[170],"tasks.":[173]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
