{"id":"https://openalex.org/W4409590668","doi":"https://doi.org/10.1007/s40747-025-01895-9","title":"Hierarchical reinforcement learning based on macro actions","display_name":"Hierarchical reinforcement learning based on macro actions","publication_year":2025,"publication_date":"2025-04-19","ids":{"openalex":"https://openalex.org/W4409590668","doi":"https://doi.org/10.1007/s40747-025-01895-9"},"language":"en","primary_location":{"id":"doi:10.1007/s40747-025-01895-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-025-01895-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-025-01895-9.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://link.springer.com/content/pdf/10.1007/s40747-025-01895-9.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101763478","display_name":"Hao Jiang","orcid":"https://orcid.org/0000-0002-8304-3692"},"institutions":[{"id":"https://openalex.org/I2802541053","display_name":"Academy of Military Medical Sciences","ror":"https://ror.org/02bv3c993","country_code":"CN","type":"facility","lineage":["https://openalex.org/I2802541053"]},{"id":"https://openalex.org/I4210160531","display_name":"Chinese People's Liberation Army","ror":"https://ror.org/05tf9r976","country_code":"CN","type":"funder","lineage":["https://openalex.org/I4210160531"]},{"id":"https://openalex.org/I4210158522","display_name":"PLA Academy of Military Science","ror":"https://ror.org/05ct4s596","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210158522"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hao Jiang","raw_affiliation_strings":["Chinese Academy of Military Science, Beijing, 100000, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Military Science, Beijing, 100000, China","institution_ids":["https://openalex.org/I4210160531","https://openalex.org/I4210158522","https://openalex.org/I2802541053"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031404939","display_name":"Gongju Wang","orcid":"https://orcid.org/0000-0002-7162-3067"},"institutions":[{"id":"https://openalex.org/I6507939","display_name":"China United Network Communications Group (China)","ror":"https://ror.org/028w99c90","country_code":"CN","type":"company","lineage":["https://openalex.org/I6507939"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gongju Wang","raw_affiliation_strings":["Data Intelligence Division, China Unicom Digital Technology Co, Beijing, 100032, China"],"affiliations":[{"raw_affiliation_string":"Data Intelligence Division, China Unicom Digital Technology Co, Beijing, 100032, China","institution_ids":["https://openalex.org/I6507939"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000067683","display_name":"Shengze Li","orcid":"https://orcid.org/0000-0003-2160-8675"},"institutions":[{"id":"https://openalex.org/I2802541053","display_name":"Academy of Military Medical Sciences","ror":"https://ror.org/02bv3c993","country_code":"CN","type":"facility","lineage":["https://openalex.org/I2802541053"]},{"id":"https://openalex.org/I4210160531","display_name":"Chinese People's Liberation Army","ror":"https://ror.org/05tf9r976","country_code":"CN","type":"funder","lineage":["https://openalex.org/I4210160531"]},{"id":"https://openalex.org/I4210158522","display_name":"PLA Academy of Military Science","ror":"https://ror.org/05ct4s596","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210158522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengze Li","raw_affiliation_strings":["Chinese Academy of Military Science, Beijing, 100000, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Military Science, Beijing, 100000, China","institution_ids":["https://openalex.org/I4210160531","https://openalex.org/I4210158522","https://openalex.org/I2802541053"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101741623","display_name":"Jieyuan Zhang","orcid":"https://orcid.org/0000-0003-2100-1312"},"institutions":[{"id":"https://openalex.org/I4210158522","display_name":"PLA Academy of Military Science","ror":"https://ror.org/05ct4s596","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210158522"]},{"id":"https://openalex.org/I4210160531","display_name":"Chinese People's Liberation Army","ror":"https://ror.org/05tf9r976","country_code":"CN","type":"funder","lineage":["https://openalex.org/I4210160531"]},{"id":"https://openalex.org/I2802541053","display_name":"Academy of Military Medical Sciences","ror":"https://ror.org/02bv3c993","country_code":"CN","type":"facility","lineage":["https://openalex.org/I2802541053"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jieyuan Zhang","raw_affiliation_strings":["Chinese Academy of Military Science, Beijing, 100000, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Military Science, Beijing, 100000, China","institution_ids":["https://openalex.org/I4210160531","https://openalex.org/I4210158522","https://openalex.org/I2802541053"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100530526","display_name":"Yan Long","orcid":null},"institutions":[{"id":"https://openalex.org/I6507939","display_name":"China United Network Communications Group (China)","ror":"https://ror.org/028w99c90","country_code":"CN","type":"company","lineage":["https://openalex.org/I6507939"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Yan","raw_affiliation_strings":["Data Intelligence Division, China Unicom Digital Technology Co, Beijing, 100032, China"],"affiliations":[{"raw_affiliation_string":"Data Intelligence Division, China Unicom Digital Technology Co, Beijing, 100032, China","institution_ids":["https://openalex.org/I6507939"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101775095","display_name":"Xinhai Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210160531","display_name":"Chinese People's Liberation Army","ror":"https://ror.org/05tf9r976","country_code":"CN","type":"funder","lineage":["https://openalex.org/I4210160531"]},{"id":"https://openalex.org/I2802541053","display_name":"Academy of Military Medical Sciences","ror":"https://ror.org/02bv3c993","country_code":"CN","type":"facility","lineage":["https://openalex.org/I2802541053"]},{"id":"https://openalex.org/I4210158522","display_name":"PLA Academy of Military Science","ror":"https://ror.org/05ct4s596","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210158522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinhai Xu","raw_affiliation_strings":["Chinese Academy of Military Science, Beijing, 100000, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Military Science, Beijing, 100000, China","institution_ids":["https://openalex.org/I4210160531","https://openalex.org/I4210158522","https://openalex.org/I2802541053"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101763478"],"corresponding_institution_ids":["https://openalex.org/I2802541053","https://openalex.org/I4210158522","https://openalex.org/I4210160531"],"apc_list":{"value":1320,"currency":"GBP","value_usd":1619},"apc_paid":{"value":1320,"currency":"GBP","value_usd":1619},"fwci":2.6124,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.89528852,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"11","issue":"6","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9824000000953674,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/macro","display_name":"Macro","score":0.6844455003738403},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6658452153205872},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.6658393144607544},{"id":"https://openalex.org/keywords/computational-intelligence","display_name":"Computational intelligence","score":0.56552654504776},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47735098004341125},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34480875730514526},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.28339290618896484},{"id":"https://openalex.org/keywords/structural-engineering","display_name":"Structural engineering","score":0.08053535223007202}],"concepts":[{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.6844455003738403},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6658452153205872},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.6658393144607544},{"id":"https://openalex.org/C139502532","wikidata":"https://www.wikidata.org/wiki/Q1122090","display_name":"Computational intelligence","level":2,"score":0.56552654504776},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47735098004341125},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34480875730514526},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.28339290618896484},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.08053535223007202},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s40747-025-01895-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-025-01895-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-025-01895-9.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:cf6601ba17fc4a63b93486a40e461650","is_oa":true,"landing_page_url":"https://doaj.org/article/cf6601ba17fc4a63b93486a40e461650","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Complex & Intelligent Systems, Vol 11, Iss 6, Pp 1-17 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s40747-025-01895-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-025-01895-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-025-01895-9.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6678289858","display_name":null,"funder_award_id":"No.62102444","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409590668.pdf","grobid_xml":"https://content.openalex.org/works/W4409590668.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1458771408","https://openalex.org/W1590762254","https://openalex.org/W1978942630","https://openalex.org/W2019608062","https://openalex.org/W2089211960","https://openalex.org/W2109910161","https://openalex.org/W2575705757","https://openalex.org/W2963411833","https://openalex.org/W2964118262","https://openalex.org/W2964227312","https://openalex.org/W3080641768","https://openalex.org/W3090863380","https://openalex.org/W3173672478","https://openalex.org/W3186772940","https://openalex.org/W4226249635","https://openalex.org/W4249441547","https://openalex.org/W4298023569","https://openalex.org/W4306680355","https://openalex.org/W4386266197","https://openalex.org/W4386600478","https://openalex.org/W4391589287","https://openalex.org/W4401823493","https://openalex.org/W4401834776","https://openalex.org/W6601101894"],"related_works":["https://openalex.org/W2030816003","https://openalex.org/W4239992647","https://openalex.org/W2150013480","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W1554458299","https://openalex.org/W81423522","https://openalex.org/W2920061524","https://openalex.org/W1509860481","https://openalex.org/W2488264085"],"abstract_inverted_index":{"The":[0,163],"large":[1,186],"action":[2,55,107,187],"space":[3,108],"is":[4],"a":[5,39,53],"key":[6],"challenge":[7],"in":[8,20,150,158,175,183],"reinforcement":[9,111,176],"learning.":[10],"Although":[11],"hierarchical":[12,35,170],"methods":[13],"have":[14],"been":[15],"proven":[16],"to":[17,37,146,178],"be":[18],"effective":[19],"addressing":[21],"this":[22],"issue,":[23],"they":[24],"are":[25,74,89],"not":[26],"fully":[27],"explored.":[28],"This":[29,50],"paper":[30],"combines":[31],"domain":[32],"knowledge":[33],"with":[34,185],"concepts":[36],"propose":[38],"novel":[40],"Hierarchical":[41],"Reinforcement":[42],"Learning":[43],"framework":[44,51,137],"based":[45,118],"on":[46,119,125],"macro":[47,54,65,80,85,173],"actions":[48,63,73,81,86,117,174],"(HRL-MA).":[49],"includes":[52],"mapping":[56],"model":[57],"that":[58,134],"abstracts":[59],"sequences":[60],"of":[61,102,109,160,168],"micro":[62],"into":[64,76],"actions,":[66],"thereby":[67],"simplifying":[68],"the":[69,100,106,110,120,126,135,166],"decision-making":[70,181],"process.":[71],"Macro":[72],"divided":[75],"two":[77],"categories:":[78],"combat":[79],"(CMA)":[82],"and":[83,96,131,172],"non-combat":[84],"(NO-CMA).":[87],"NO-CMA":[88],"driven":[90],"by":[91],"decision":[92],"tree-based":[93],"logical":[94],"rules":[95],"provide":[97],"conditions":[98],"for":[99],"execution":[101],"CMA.":[103],"CMA":[104],"form":[105],"learning":[112,177],"algorithm,":[113],"which":[114],"dynamically":[115],"selects":[116],"current":[121],"state.":[122],"Comprehensive":[123],"tests":[124],"StarCraft":[127],"II":[128],"maps":[129],"Simple64":[130],"AbyssalReefLE":[132],"demonstrate":[133],"HRL-MA":[136,153],"exhibits":[138],"superior":[139],"performance,":[140],"achieving":[141],"higher":[142],"win":[143],"rates":[144],"compared":[145],"baseline":[147,156],"algorithms.":[148],"Furthermore,":[149],"mini-game":[151],"scenarios,":[152],"consistently":[154],"outperforms":[155],"algorithms":[157],"terms":[159],"reward":[161],"scores.":[162],"findings":[164],"highlight":[165],"effectiveness":[167],"integrating":[169],"structures":[171],"manage":[179],"complex":[180],"tasks":[182],"environments":[184],"spaces.":[188]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
