{"id":"https://openalex.org/W4385764190","doi":"https://doi.org/10.24963/ijcai.2023/391","title":"Ensemble Reinforcement Learning in Continuous Spaces -- A Hierarchical Multi-Step Approach for Policy Training","display_name":"Ensemble Reinforcement Learning in Continuous Spaces -- A Hierarchical Multi-Step Approach for Policy Training","publication_year":2023,"publication_date":"2023-08-01","ids":{"openalex":"https://openalex.org/W4385764190","doi":"https://doi.org/10.24963/ijcai.2023/391"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2023/391","is_oa":true,"landing_page_url":"http://dx.doi.org/10.24963/ijcai.2023/391","pdf_url":"https://www.ijcai.org/proceedings/2023/0391.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2023/0391.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100389298","display_name":"Gang Chen","orcid":"https://orcid.org/0000-0002-9597-497X"},"institutions":[{"id":"https://openalex.org/I41156924","display_name":"Victoria University of Wellington","ror":"https://ror.org/0040r6f76","country_code":"NZ","type":"education","lineage":["https://openalex.org/I41156924"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Gang Chen","raw_affiliation_strings":["Victoria University of Wellington","School of Engineering and Computer Science, Victoria University of Wellington, New Zealand"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Victoria University of Wellington","institution_ids":["https://openalex.org/I41156924"]},{"raw_affiliation_string":"School of Engineering and Computer Science, Victoria University of Wellington, New Zealand","institution_ids":["https://openalex.org/I41156924"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040143684","display_name":"Victoria Huang","orcid":"https://orcid.org/0000-0003-4037-9898"},"institutions":[{"id":"https://openalex.org/I45935490","display_name":"National Institute of Water and Atmospheric Research","ror":"https://ror.org/04hxcaz34","country_code":"NZ","type":"facility","lineage":["https://openalex.org/I45935490"]}],"countries":["NZ"],"is_corresponding":true,"raw_author_name":"Victoria Huang","raw_affiliation_strings":["National Institute of Water and Atmospheric Research","National Institute of Water and Atmospheric Research, New Zealand"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Institute of Water and Atmospheric Research","institution_ids":["https://openalex.org/I45935490"]},{"raw_affiliation_string":"National Institute of Water and Atmospheric Research, New Zealand","institution_ids":["https://openalex.org/I45935490"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5040143684"],"corresponding_institution_ids":["https://openalex.org/I45935490"],"apc_list":null,"apc_paid":null,"fwci":0.1657,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.55261251,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"3514","last_page":"3522"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9750000238418579,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9276000261306763,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8747267723083496},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7808928489685059},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7706871628761292},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.6943496465682983},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.65789395570755},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5919992327690125},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5365251302719116},{"id":"https://openalex.org/keywords/base","display_name":"Base (topology)","score":0.48069244623184204},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4741777181625366},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.45156362652778625},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2262401282787323},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08447948098182678}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8747267723083496},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7808928489685059},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7706871628761292},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.6943496465682983},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.65789395570755},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5919992327690125},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5365251302719116},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.48069244623184204},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4741777181625366},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.45156362652778625},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2262401282787323},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08447948098182678},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2023/391","is_oa":true,"landing_page_url":"http://dx.doi.org/10.24963/ijcai.2023/391","pdf_url":"https://www.ijcai.org/proceedings/2023/0391.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2023/391","is_oa":true,"landing_page_url":"http://dx.doi.org/10.24963/ijcai.2023/391","pdf_url":"https://www.ijcai.org/proceedings/2023/0391.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4385764190.pdf"},"referenced_works_count":48,"referenced_works":["https://openalex.org/W1646152356","https://openalex.org/W2043806097","https://openalex.org/W2155968351","https://openalex.org/W2260756217","https://openalex.org/W2280163991","https://openalex.org/W2419612459","https://openalex.org/W2470693974","https://openalex.org/W2590773562","https://openalex.org/W2623491082","https://openalex.org/W2736601468","https://openalex.org/W2746553466","https://openalex.org/W2749928749","https://openalex.org/W2767313115","https://openalex.org/W2778821583","https://openalex.org/W2781726626","https://openalex.org/W2785389871","https://openalex.org/W2787938642","https://openalex.org/W2796447411","https://openalex.org/W2798705390","https://openalex.org/W2807588596","https://openalex.org/W2891797170","https://openalex.org/W2963238274","https://openalex.org/W2963276097","https://openalex.org/W2963864421","https://openalex.org/W2964043796","https://openalex.org/W2972166034","https://openalex.org/W2981668237","https://openalex.org/W2994722919","https://openalex.org/W2998050631","https://openalex.org/W3005089802","https://openalex.org/W3015707918","https://openalex.org/W3034225898","https://openalex.org/W3041764008","https://openalex.org/W3084269620","https://openalex.org/W3086019649","https://openalex.org/W3126321819","https://openalex.org/W3127556163","https://openalex.org/W3172115140","https://openalex.org/W3203827806","https://openalex.org/W4287714048","https://openalex.org/W4302570325","https://openalex.org/W4308170110","https://openalex.org/W4394666657","https://openalex.org/W6684205842","https://openalex.org/W6783140480","https://openalex.org/W6863631769","https://openalex.org/W6863994431","https://openalex.org/W6864014924"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W4246352526","https://openalex.org/W2121910908","https://openalex.org/W915438175","https://openalex.org/W85029034"],"abstract_inverted_index":{"Actor-critic":[0],"deep":[1],"reinforcement":[2,15],"learning":[3,16,43,49,71,126],"(DRL)":[4],"algorithms":[5,37,60,78,161],"have":[6,61],"recently":[7],"achieved":[8],"prominent":[9],"success":[10],"in":[11,47],"tackling":[12],"various":[13],"challenging":[14],"(RL)":[17],"problems,":[18],"particularly":[19],"complex":[20],"control":[21],"tasks":[22],"with":[23],"high-dimensional":[24],"continuous":[25],"state":[26],"and":[27,51,68],"action":[28],"spaces.":[29],"Nevertheless,":[30],"existing":[31,76],"research":[32],"showed":[33],"that":[34,131],"actor-critic":[35],"DRL":[36,59,130,160],"often":[38],"failed":[39],"to":[40,65,102,121,156],"explore":[41],"their":[42],"environments":[44],"effectively,":[45],"resulting":[46],"limited":[48],"stability":[50],"performance.":[52],"To":[53],"address":[54],"this":[55,95],"limitation,":[56],"several":[57,158],"ensemble":[58,77,105,129],"been":[62],"proposed":[63],"lately":[64],"boost":[66],"exploration":[67],"stabilize":[69],"the":[70,89,92],"process.":[72],"However,":[73],"most":[74],"of":[75,91,106,143],"do":[79],"not":[80],"explicitly":[81],"train":[82,103],"all":[83],"base":[84,107],"learners":[85,108],"towards":[86],"jointly":[87],"optimizing":[88],"performance":[90],"ensemble.":[93],"In":[94],"paper,":[96],"we":[97],"propose":[98],"a":[99,123],"new":[100,124,145],"technique":[101,118],"an":[104,111],"based":[109],"on":[110,162],"innovative":[112],"multi-step":[113],"integration":[114],"method.":[115],"This":[116],"training":[117],"enables":[119],"us":[120],"develop":[122],"hierarchical":[125],"algorithm":[127,146,151],"for":[128],"effectively":[132],"promotes":[133],"inter-learner":[134,138],"collaboration":[135],"through":[136],"stable":[137],"parameter":[139],"sharing.":[140],"The":[141,150],"design":[142],"our":[144],"is":[147,152],"verified":[148],"theoretically.":[149],"also":[153],"shown":[154],"empirically":[155],"outperform":[157],"state-of-the-art":[159],"multiple":[163],"benchmark":[164],"RL":[165],"problems.":[166]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
