{"id":"https://openalex.org/W4410427822","doi":"https://doi.org/10.1109/tnnls.2025.3566548","title":"A Policy-Guided Reinforcement Learning Method for Encirclement Control in Multiobstacle Environment","display_name":"A Policy-Guided Reinforcement Learning Method for Encirclement Control in Multiobstacle Environment","publication_year":2025,"publication_date":"2025-05-16","ids":{"openalex":"https://openalex.org/W4410427822","doi":"https://doi.org/10.1109/tnnls.2025.3566548","pmid":"https://pubmed.ncbi.nlm.nih.gov/40378032"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2025.3566548","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3566548","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088439780","display_name":"Fandi Gou","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fandi Gou","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0002-6149-3382","affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065262820","display_name":"Haikuo Du","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haikuo Du","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069998161","display_name":"Chenyu Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenyu Zhao","raw_affiliation_strings":["Department of Electronic Engineering, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0007-6328-2788","affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012941986","display_name":"Yunze Cai","orcid":"https://orcid.org/0000-0002-1783-2984"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunze Cai","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-1783-2984","affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.6985,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.8974406,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"36","issue":"9","first_page":"17034","last_page":"17046"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14011","display_name":"Elevator Systems and Control","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7273101806640625},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.6102182269096375},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5946006774902344},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.45116475224494934},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.25714540481567383},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.23334449529647827},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.10657668113708496}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7273101806640625},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.6102182269096375},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5946006774902344},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45116475224494934},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.25714540481567383},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23334449529647827},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.10657668113708496}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2025.3566548","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3566548","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:40378032","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40378032","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/13","display_name":"Climate action","score":0.7400000095367432}],"awards":[{"id":"https://openalex.org/G4544386380","display_name":null,"funder_award_id":"20220001057001","funder_id":"https://openalex.org/F4320322857","funder_display_name":"Aeronautical Science Foundation of China"},{"id":"https://openalex.org/G8410442353","display_name":null,"funder_award_id":"20240001057002","funder_id":"https://openalex.org/F4320322857","funder_display_name":"Aeronautical Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320322857","display_name":"Aeronautical Science Foundation of China","ror":"https://ror.org/02wq41p38"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1969839206","https://openalex.org/W1976486181","https://openalex.org/W2005245444","https://openalex.org/W2012518714","https://openalex.org/W2014774895","https://openalex.org/W2085207353","https://openalex.org/W2100781213","https://openalex.org/W2102488278","https://openalex.org/W2557827245","https://openalex.org/W2890428648","https://openalex.org/W2908261578","https://openalex.org/W2963809389","https://openalex.org/W2984557458","https://openalex.org/W3000179003","https://openalex.org/W3035829937","https://openalex.org/W3040707741","https://openalex.org/W3043763914","https://openalex.org/W3090059281","https://openalex.org/W3093345403","https://openalex.org/W3142849873","https://openalex.org/W3205953818","https://openalex.org/W3207097712","https://openalex.org/W4206874491","https://openalex.org/W4213246061","https://openalex.org/W4280571816","https://openalex.org/W4285102241","https://openalex.org/W4306148701","https://openalex.org/W4312366589","https://openalex.org/W4321195868","https://openalex.org/W4321763414","https://openalex.org/W4328007223","https://openalex.org/W4386322145","https://openalex.org/W4390905475","https://openalex.org/W4403390202"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856"],"abstract_inverted_index":{"The":[0],"problem":[1],"of":[2,74,141,148],"multiagent":[3],"encirclement":[4,48,69,93,109],"with":[5],"multiobstacle":[6],"collision":[7],"avoidance":[8],"(EMOCA)":[9],"has":[10],"been":[11],"challenging":[12],"since":[13],"it":[14],"is":[15,114,150],"difficult":[16],"to":[17,61,87,102,116,127,135],"balance":[18],"the":[19,32,53,67,98,137,145,156],"tradeoff":[20],"between":[21],"surrounding":[22],"a":[23,37,72,107,123,166],"mobile":[24],"target":[25],"and":[26,56,97,120,139,144],"avoiding":[27],"obstacles":[28],"simultaneously.":[29],"To":[30],"address":[31],"EMOCA":[33],"problem,":[34],"we":[35,121],"proposed":[36],"novel":[38],"policy-guided":[39],"reinforcement":[40],"learning":[41,55,58],"(RL)":[42],"method,":[43,143],"namely,":[44],"multiregulator-assisted":[45],"RL":[46],"for":[47],"control":[49],"(MRA-RLEC)":[50],"which":[51],"leverages":[52],"jump-start":[54],"curriculum":[57],"(CL)":[59],"mechanism":[60],"enhance":[62],"training":[63,90],"efficiency.":[64],"MRA-RLEC":[65,149],"divides":[66],"complex":[68],"task":[70],"into":[71],"sequence":[73],"subtasks,":[75],"progressively":[76],"increasing":[77],"in":[78,155],"difficulty.":[79],"In":[80],"this":[81],"process,":[82],"multiple":[83],"regulators":[84],"are":[85,132],"utilized":[86],"adjust":[88],"various":[89],"aspects,":[91],"including":[92],"condition,":[94],"obstacle":[95],"avoidance,":[96],"transition":[99],"from":[100],"guide":[101],"learned":[103],"policy":[104],"execution.":[105],"Besides,":[106],"global":[108],"reward":[110,118],"decomposition":[111],"(GERD)":[112],"method":[113],"presented":[115],"alleviate":[117],"sparsity,":[119],"design":[122],"bidirectional":[124],"communication":[125],"protocol":[126],"reduce":[128],"communication.":[129],"Extensive":[130],"experiments":[131,153],"carried":[133],"out":[134],"showcase":[136],"robustness":[138],"superiority":[140],"our":[142],"practical":[146],"applicability":[147],"demonstrated":[151],"through":[152],"conducted":[154],"robot":[157],"operating":[158],"system":[159],"2":[160],"(ROS2)-based":[161],"simulation":[162],"platform,":[163],"Gazebo,":[164],"using":[165],"self-designed":[167],"omnidirectional":[168],"vehicle":[169],"model.":[170]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-12T08:23:45.883708","created_date":"2025-10-10T00:00:00"}