{"id":"https://openalex.org/W4404035314","doi":"https://doi.org/10.1109/access.2024.3491339","title":"Discovering and Exploiting Skills in Hierarchical Reinforcement Learning","display_name":"Discovering and Exploiting Skills in Hierarchical Reinforcement Learning","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4404035314","doi":"https://doi.org/10.1109/access.2024.3491339"},"language":"en","primary_location":{"id":"doi:10.1109/access.2024.3491339","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3491339","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2024.3491339","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101446217","display_name":"Zhigang Huang","orcid":"https://orcid.org/0000-0002-5828-5008"},"institutions":[{"id":"https://openalex.org/I4210129465","display_name":"Wuhan Ship Development & Design Institute","ror":"https://ror.org/02mcdae06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210129465"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhigang Huang","raw_affiliation_strings":["China Ship Scientific Research Center, Wuxi, China","China Ship Scientific Research Center, Wuxi, P.R.China"],"raw_orcid":"https://orcid.org/0000-0002-5828-5008","affiliations":[{"raw_affiliation_string":"China Ship Scientific Research Center, Wuxi, China","institution_ids":["https://openalex.org/I4210129465"]},{"raw_affiliation_string":"China Ship Scientific Research Center, Wuxi, P.R.China","institution_ids":["https://openalex.org/I4210129465"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5101446217"],"corresponding_institution_ids":["https://openalex.org/I4210129465"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16928639,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"12","issue":null,"first_page":"163042","last_page":"163055"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.43970000743865967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.43970000743865967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.41280001401901245,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8021032810211182},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7649838924407959},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4325965642929077},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3291395902633667}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8021032810211182},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7649838924407959},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4325965642929077},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3291395902633667}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2024.3491339","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3491339","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:ce00cb03dc184aa09f1d8c1fb9f16023","is_oa":true,"landing_page_url":"https://doaj.org/article/ce00cb03dc184aa09f1d8c1fb9f16023","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 12, Pp 163042-163055 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2024.3491339","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3491339","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W2272841450","https://openalex.org/W2884439071","https://openalex.org/W2899620415","https://openalex.org/W2963142324","https://openalex.org/W2963608065","https://openalex.org/W2964505566","https://openalex.org/W2997250483","https://openalex.org/W2998670262","https://openalex.org/W3004534398","https://openalex.org/W3034780381","https://openalex.org/W3034988830","https://openalex.org/W3134939669","https://openalex.org/W3205046940","https://openalex.org/W4207021923","https://openalex.org/W4225304231","https://openalex.org/W4225383724","https://openalex.org/W4297106277","https://openalex.org/W4312314355","https://openalex.org/W4312458941","https://openalex.org/W4386950658","https://openalex.org/W6703271639","https://openalex.org/W6730153900","https://openalex.org/W6734215269","https://openalex.org/W6734325300","https://openalex.org/W6734517396","https://openalex.org/W6736368053","https://openalex.org/W6745503460","https://openalex.org/W6745830540","https://openalex.org/W6747473740","https://openalex.org/W6748603076","https://openalex.org/W6752089545","https://openalex.org/W6752216738","https://openalex.org/W6753756885","https://openalex.org/W6757947852","https://openalex.org/W6758134627","https://openalex.org/W6761754428","https://openalex.org/W6764724164","https://openalex.org/W6767317771","https://openalex.org/W6772008794","https://openalex.org/W6779715229","https://openalex.org/W6779728822","https://openalex.org/W6785386017","https://openalex.org/W6785599724","https://openalex.org/W6788646031","https://openalex.org/W6790486821","https://openalex.org/W6796601913","https://openalex.org/W6797416203","https://openalex.org/W6803978876","https://openalex.org/W6853203130"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Humans":[0],"can":[1],"perform":[2],"infinite":[3],"diverse":[4],"skills.":[5,146],"These":[6,147],"skills":[7,35,51,54,65,114,149],"typically":[8],"represent":[9],"abstract":[10],"knowledge":[11],"that":[12,57],"is":[13,164],"highly":[14],"correlated":[15],"with":[16,204],"time":[17,101],"series.":[18],"To":[19],"behave":[20],"more":[21],"like":[22],"a":[23,27,70,74,122],"human,":[24],"we":[25,172],"take":[26],"long-term":[28,144],"planning":[29],"perspective":[30,184],"to":[31,52,99,106,131,153,167],"discover":[32],"and":[33,55,85,125,138,179,188,207],"exploit":[34],"(DES)":[36],"in":[37,115],"hierarchical":[38],"reinforcement":[39],"learning.":[40],"We":[41],"view":[42],"the":[43,67,80,89,94,132,136,139,151,174,183,196,209],"skill-learning":[44],"process":[45],"as":[46],"an":[47,116],"extension":[48],"of":[49,69,82,135,176,185,198,211],"primitive":[50],"advanced":[53,148],"ensure":[56],"they":[58],"have":[59],"sufficient":[60],"exploration":[61,189],"capability.":[62],"DES":[63,112,199],"discovers":[64],"at":[66,79],"level":[68,81],"trajectory":[71],"sequence":[72],"within":[73],"skill":[75,90,104,162,186],"length,":[76],"rather":[77],"than":[78],"individual":[83],"states":[84],"actions.":[86],"It":[87,119],"assigns":[88],"inference":[91,187],"loss":[92],"from":[93,155,182],"recurrent":[95],"neural":[96],"network":[97],"evenly":[98],"each":[100],"step,":[102],"maximizing":[103],"differentiation":[105],"cover":[107],"fine-grained":[108],"local":[109,156],"areas.":[110],"Furthermore,":[111],"exploits":[113],"adaptive":[117],"way.":[118],"builds":[120],"on":[121,200],"multi-step":[123],"combination,":[124],"then":[126],"makes":[127],"switching":[128],"decisions":[129],"according":[130],"relative":[133],"advantages":[134],"previous":[137],"estimated":[140],"skills,":[141],"thus":[142],"achieving":[143],"form":[145],"allow":[150],"agent":[152],"escape":[154],"areas":[157],"without":[158],"sacrificing":[159],"flexibility.":[160],"A":[161],"truncation":[163],"also":[165],"set":[166],"prevent":[168],"excessive":[169],"exploration.":[170],"Moreover,":[171],"verify":[173],"necessity":[175],"our":[177,212],"discovery":[178],"exploitation":[180],"methods":[181],"capability,":[190],"respectively.":[191],"Our":[192],"experimental":[193],"analysis":[194],"demonstrates":[195],"superiority":[197],"continuous":[201],"control":[202],"tasks":[203],"sparse":[205],"rewards":[206],"explains":[208],"benefits":[210],"methods.":[213]},"counts_by_year":[],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
