{"id":"https://openalex.org/W4401507809","doi":"https://doi.org/10.1109/tnnls.2024.3425809","title":"Guided Cooperation in Hierarchical Reinforcement Learning via Model-Based Rollout","display_name":"Guided Cooperation in Hierarchical Reinforcement Learning via Model-Based Rollout","publication_year":2024,"publication_date":"2024-08-12","ids":{"openalex":"https://openalex.org/W4401507809","doi":"https://doi.org/10.1109/tnnls.2024.3425809","pmid":"https://pubmed.ncbi.nlm.nih.gov/39133586"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2024.3425809","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3425809","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100324824","display_name":"Haoran Wang","orcid":"https://orcid.org/0000-0002-4622-0119"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haoran Wang","raw_affiliation_strings":["Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039895455","display_name":"Zeshen Tang","orcid":"https://orcid.org/0000-0001-8765-6464"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeshen Tang","raw_affiliation_strings":["Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102966948","display_name":"Yaoru Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaoru Sun","raw_affiliation_strings":["Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100321464","display_name":"Fang Wang","orcid":"https://orcid.org/0000-0003-1987-9150"},"institutions":[{"id":"https://openalex.org/I59433898","display_name":"Brunel University of London","ror":"https://ror.org/00dn4t376","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I59433898"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Fang Wang","raw_affiliation_strings":["Department of Computer Science, Brunel University London, Uxbridge, U.K"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Brunel University London, Uxbridge, U.K","institution_ids":["https://openalex.org/I59433898"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100347374","display_name":"Siyu Zhang","orcid":"https://orcid.org/0000-0002-0001-0204"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siyu Zhang","raw_affiliation_strings":["Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102932116","display_name":"Yeming Chen","orcid":"https://orcid.org/0009-0005-5515-1943"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yeming Chen","raw_affiliation_strings":["Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, College of Electronics and Information Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100324824"],"corresponding_institution_ids":["https://openalex.org/I116953780"],"apc_list":null,"apc_paid":null,"fwci":1.0878,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.80912395,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"36","issue":"5","first_page":"8455","last_page":"8469"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8676000237464905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8676000237464905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6359630823135376},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.564522385597229},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.46315398812294006},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.256552517414093},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.25505998730659485},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.09748008847236633}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6359630823135376},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.564522385597229},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.46315398812294006},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.256552517414093},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.25505998730659485},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.09748008847236633}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tnnls.2024.3425809","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3425809","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:39133586","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39133586","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null},{"id":"pmh:oai:bura.brunel.ac.uk:2438/30166","is_oa":false,"landing_page_url":"https://bura.brunel.ac.uk/handle/2438/30166","pdf_url":null,"source":{"id":"https://openalex.org/S4306401473","display_name":"Brunel University Research Archive (BURA) (Brunel University London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I59433898","host_organization_name":"Brunel University of London","host_organization_lineage":["https://openalex.org/I59433898"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W1923344279","https://openalex.org/W1980035368","https://openalex.org/W2090170171","https://openalex.org/W2416477367","https://openalex.org/W2963781688","https://openalex.org/W3038629022","https://openalex.org/W3038822267","https://openalex.org/W3127756416","https://openalex.org/W3132408792","https://openalex.org/W3134939669","https://openalex.org/W3136456184","https://openalex.org/W3168202624","https://openalex.org/W3190132124","https://openalex.org/W3208890675","https://openalex.org/W4224925640","https://openalex.org/W4312422030","https://openalex.org/W4316041871","https://openalex.org/W4385453680","https://openalex.org/W4391259941","https://openalex.org/W6675999342","https://openalex.org/W6684488266","https://openalex.org/W6684921986","https://openalex.org/W6734215269","https://openalex.org/W6735913928","https://openalex.org/W6736368053","https://openalex.org/W6740801417","https://openalex.org/W6748519856","https://openalex.org/W6748839928","https://openalex.org/W6748848655","https://openalex.org/W6751494529","https://openalex.org/W6752089545","https://openalex.org/W6753183898","https://openalex.org/W6753264383","https://openalex.org/W6755459604","https://openalex.org/W6756303580","https://openalex.org/W6759871227","https://openalex.org/W6763704811","https://openalex.org/W6764053384","https://openalex.org/W6764173040","https://openalex.org/W6766413688","https://openalex.org/W6768220214","https://openalex.org/W6772121735","https://openalex.org/W6772264461","https://openalex.org/W6774915356","https://openalex.org/W6777635358","https://openalex.org/W6777656069","https://openalex.org/W6779715229","https://openalex.org/W6783814031","https://openalex.org/W6784431470","https://openalex.org/W6786644593","https://openalex.org/W6790486821","https://openalex.org/W6791413555","https://openalex.org/W6802965437","https://openalex.org/W6845909825","https://openalex.org/W6846453103","https://openalex.org/W6850848066","https://openalex.org/W6853445487"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588"],"abstract_inverted_index":{"Goal-conditioned":[0],"hierarchical":[1,38,214],"reinforcement":[2,16,215],"learning":[3,17,216],"(HRL)":[4],"presents":[5],"a":[6,59,122,126,132,144,210],"promising":[7],"approach":[8],"for":[9],"enabling":[10],"effective":[11,139],"exploration":[12],"in":[13,37,191],"complex,":[14],"long-horizon":[15],"(RL)":[18],"tasks":[19],"through":[20],"temporal":[21],"abstraction.":[22],"Empirically,":[23],"heightened":[24],"interlevel":[25,54,197],"communication":[26],"and":[27,33,82,113,224,231,239],"coordination":[28],"can":[29],"induce":[30],"more":[31,133,229],"stable":[32,134,230],"robust":[34,232],"policy":[35,136,170,233],"improvement":[36,234],"systems.":[39],"Yet,":[40],"most":[41],"existing":[42],"goal-conditioned":[43,61],"HRL":[44,62],"algorithms":[45],"have":[46],"primarily":[47],"focused":[48],"on":[49],"the":[50,89,92,110,154,161,167,172,205],"subgoal":[51],"discovery,":[52],"neglecting":[53],"cooperation.":[55],"Here,":[56],"we":[57,142,159],"propose":[58,143],"novel":[60],"framework":[63,208],"named":[64],"Guided":[65],"Cooperation":[66],"via":[67,98],"Model-Based":[68],"Rollout":[69],"(GCMR;":[70],"code":[71],"is":[72],"available":[73],"at":[74],"https://github.com/HaoranWang-TJ/GCMR_ACLG_official),":[75],"aiming":[76],"to":[77,106,131,138,152,183,195],"bridge":[78],"interlayer":[79],"information":[80,181],"synchronization":[81],"cooperation":[83,198],"by":[84,109,218],"exploiting":[85],"forward":[86],"dynamics.":[87],"First,":[88],"GCMR":[90,192,207],"mitigates":[91],"state-transition":[93],"error":[94],"within":[95],"off-policy":[96],"correction":[97],"model-based":[99],"rollout,":[100],"thereby":[101,177],"enhancing":[102],"sample":[103],"efficiency.":[104],"Second,":[105],"prevent":[107],"disruption":[108],"unseen":[111],"subgoals":[112],"states,":[114],"lower":[115,155,168],"level":[116,150,156,169,174],"Q-function":[117],"gradients":[118],"are":[119,193],"constrained":[120],"using":[121,148,171],"gradient":[123],"penalty":[124],"with":[125,209,236],"model-inferred":[127],"upper":[128],"bound,":[129],"leading":[130],"behavioral":[135],"conducive":[137],"exploration.":[140],"Third,":[141],"one-step":[145],"rollout-based":[146],"planning,":[147],"higher":[149,173],"critics":[151],"guide":[153],"policy.":[157],"Specifically,":[158],"estimate":[160],"value":[162],"of":[163,166,213],"future":[164],"states":[165],"critic":[175],"function,":[176],"transmitting":[178],"global":[179],"task":[180],"downward":[182],"avoid":[184],"local":[185],"pitfalls.":[186],"These":[187],"three":[188],"critical":[189],"components":[190],"expected":[194],"facilitate":[196],"significantly.":[199],"Experimental":[200],"results":[201],"demonstrate":[202],"that":[203],"incorporating":[204],"proposed":[206],"disentangled":[211],"variant":[212],"guided":[217],"landmarks":[219],"(HIGL),":[220],"namely,":[221],"adjacency":[222],"constraint":[223],"landmark-guided":[225],"planning":[226],"(ACLG),":[227],"yields":[228],"compared":[235],"various":[237],"baselines":[238],"significantly":[240],"outperforms":[241],"previous":[242],"state-of-the-art":[243],"(SOTA)":[244],"algorithms.":[245]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
