{"id":"https://openalex.org/W7123362426","doi":"https://doi.org/10.1109/tsmc.2025.3646451","title":"Subgoal-Based Hierarchical Reinforcement Learning for Multiagent Collaboration","display_name":"Subgoal-Based Hierarchical Reinforcement Learning for Multiagent Collaboration","publication_year":2026,"publication_date":"2026-01-12","ids":{"openalex":"https://openalex.org/W7123362426","doi":"https://doi.org/10.1109/tsmc.2025.3646451"},"language":null,"primary_location":{"id":"doi:10.1109/tsmc.2025.3646451","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2025.3646451","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122884700","display_name":"Cheng Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Cheng Xu","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-1624-5494","affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060868822","display_name":"Yuchen Shi","orcid":"https://orcid.org/0009-0007-2790-9678"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuchen Shi","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122884765","display_name":"Changtian Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changtian Zhang","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087067755","display_name":"Ran Wang","orcid":"https://orcid.org/0000-0002-9530-8838"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ran Wang","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9530-8838","affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122871449","display_name":"Shihong Duan","orcid":null},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shihong Duan","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-8162-4269","affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034784614","display_name":"Yadong Wan","orcid":null},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yadong Wan","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"last","author":{"id":null,"display_name":"Xiaotong Zhang","orcid":"https://orcid.org/0000-0001-7600-7231"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaotong Zhang","raw_affiliation_strings":["School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7600-7231","affiliations":[{"raw_affiliation_string":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5122884700"],"corresponding_institution_ids":["https://openalex.org/I92403157"],"apc_list":null,"apc_paid":null,"fwci":32.8796,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.98446297,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"56","issue":"2","first_page":"1203","last_page":"1215"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7942000031471252,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7942000031471252,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.027699999511241913,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.012199999764561653,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7907999753952026},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.7400000095367432},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4740999937057495},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.46970000863075256},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.4284999966621399},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.3968000113964081},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.3587000072002411}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7907999753952026},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7796000242233276},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.7400000095367432},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5101000070571899},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4740999937057495},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.46970000863075256},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.4284999966621399},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.3968000113964081},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3855000138282776},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.3587000072002411},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.35679998993873596},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3472000062465668},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.32499998807907104},{"id":"https://openalex.org/C137703981","wikidata":"https://www.wikidata.org/wiki/Q4692093","display_name":"Agent architecture","level":3,"score":0.30059999227523804},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.28600001335144043},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.25429999828338623},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsmc.2025.3646451","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2025.3646451","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G213667498","display_name":null,"funder_award_id":"2023A1515140071","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"},{"id":"https://openalex.org/G3730710168","display_name":null,"funder_award_id":"202006465043","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G4708477135","display_name":null,"funder_award_id":"202306460078","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G6100071962","display_name":null,"funder_award_id":"62101029","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7615683452","display_name":null,"funder_award_id":"2023A1515140071","funder_id":"https://openalex.org/F4320320671","funder_display_name":"National Research Foundation"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2109910161","https://openalex.org/W2617547828","https://openalex.org/W2807741983","https://openalex.org/W2913662073","https://openalex.org/W4283170178","https://openalex.org/W4319165238","https://openalex.org/W4324060653","https://openalex.org/W4377695283","https://openalex.org/W4382202831","https://openalex.org/W4383108302","https://openalex.org/W4386918877","https://openalex.org/W4396690897","https://openalex.org/W4399055968","https://openalex.org/W4415796807"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,17,98,112,149,162],"reinforcement":[3],"learning":[4],"(RL)":[5],"have":[6],"driven":[7],"progress":[8],"across":[9],"various":[10],"domains;":[11],"however,":[12],"RL":[13,36,136],"algorithms":[14,137],"often":[15],"struggle":[16],"complex":[18,44,164],"multiagent":[19,56,113,150],"environments":[20],"due":[21],"to":[22,100],"challenges":[23],"such":[24],"as":[25],"instability,":[26],"low":[27],"sample":[28,85],"efficiency,":[29],"and":[30,80,87,146,157],"the":[31,106,155],"curse":[32],"of":[33,109,159],"dimensionality.":[34],"Hierarchical":[35],"(HRL)":[37],"provides":[38],"a":[39,52,63,91,121],"structured":[40],"framework":[41],"for":[42,55],"decomposing":[43],"tasks":[45],"into":[46],"more":[47,127],"manageable":[48],"subtasks,":[49],"making":[50],"it":[51],"promising":[53],"approach":[54,141],"systems.":[57],"In":[58],"this":[59],"article,":[60],"we":[61,89,104],"introduce":[62],"novel":[64],"hierarchical":[65,118],"architecture":[66,119],"that":[67,95,139],"autonomously":[68],"generates":[69],"effective":[70,128],"subgoals":[71,97],"without":[72],"explicit":[73],"constraints,":[74],"thereby":[75,125],"enhancing":[76],"both":[77],"training":[78],"stability":[79],"adaptability.":[81],"To":[82],"further":[83],"improve":[84],"efficiency":[86],"adaptability,":[88],"propose":[90],"dynamic":[92],"goal-generation":[93],"strategy":[94,129],"adjusts":[96],"response":[99],"environmental":[101],"changes.":[102],"Additionally,":[103],"address":[105],"critical":[107],"challenge":[108],"credit":[110],"assignment":[111],"settings":[114],"by":[115],"integrating":[116],"our":[117,140,160],"with":[120],"modified":[122],"QMIX":[123],"network,":[124],"facilitating":[126],"coordination.":[130],"Extensive":[131],"comparative":[132],"experiments":[133],"against":[134],"state-of-the-art":[135],"demonstrate":[138],"achieves":[142],"superior":[143],"convergence":[144],"speed":[145],"overall":[147],"performance":[148],"environments.":[151],"These":[152],"results":[153],"validate":[154],"effectiveness":[156],"flexibility":[158],"method":[161],"handling":[163],"coordination":[165],"tasks.":[166],"The":[167],"implementation":[168],"is":[169],"publicly":[170],"available":[171],"at":[172],"https://github.com/SICC-Group/GMAH":[173]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-03T08:25:01.440150","created_date":"2026-01-14T00:00:00"}
