{"id":"https://openalex.org/W4388240237","doi":"https://doi.org/10.1109/tnnls.2023.3326867","title":"Safe Adaptive Policy Transfer Reinforcement Learning for Distributed Multiagent Control","display_name":"Safe Adaptive Policy Transfer Reinforcement Learning for Distributed Multiagent Control","publication_year":2023,"publication_date":"2023-11-02","ids":{"openalex":"https://openalex.org/W4388240237","doi":"https://doi.org/10.1109/tnnls.2023.3326867","pmid":"https://pubmed.ncbi.nlm.nih.gov/37917524"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2023.3326867","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3326867","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068667180","display_name":"Bin Du","orcid":"https://orcid.org/0000-0002-5308-3546"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]},{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bin Du","raw_affiliation_strings":["Ocean Institute, Northwestern Polytechnical University, Taicang, China","Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Ocean Institute, Northwestern Polytechnical University, Taicang, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014747872","display_name":"Wei Xie","orcid":"https://orcid.org/0000-0003-4984-6659"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Xie","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100421776","display_name":"Yang Li","orcid":"https://orcid.org/0000-0003-3109-0953"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Li","raw_affiliation_strings":["College of Mechanical and Vehicle Engineering, Hunan University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Mechanical and Vehicle Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070497551","display_name":"Qisong Yang","orcid":"https://orcid.org/0000-0002-9686-2697"},"institutions":[{"id":"https://openalex.org/I4210130660","display_name":"Xi'an High Tech University","ror":"https://ror.org/03vt7za95","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210130660"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qisong Yang","raw_affiliation_strings":["Xi&#x2019;an Institute of High-Tech, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"Xi&#x2019;an Institute of High-Tech, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I4210130660"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100414775","display_name":"Weidong Zhang","orcid":"https://orcid.org/0000-0002-4700-1276"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weidong Zhang","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068318450","display_name":"Rudy R. Negenborn","orcid":"https://orcid.org/0000-0001-9784-1225"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Rudy R. Negenborn","raw_affiliation_strings":["Department of Maritime and Transport Technology, Delft University of Technology, Delft, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Department of Maritime and Transport Technology, Delft University of Technology, Delft, The Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050050751","display_name":"Yusong Pang","orcid":"https://orcid.org/0000-0001-8094-3436"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Yusong Pang","raw_affiliation_strings":["Department of Maritime and Transport Technology, Delft University of Technology, Delft, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Department of Maritime and Transport Technology, Delft University of Technology, Delft, The Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019419996","display_name":"Hongtian Chen","orcid":"https://orcid.org/0000-0002-8600-9668"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongtian Chen","raw_affiliation_strings":["Department of Automation, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5068667180"],"corresponding_institution_ids":["https://openalex.org/I17145004","https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":1.872,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86042467,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"36","issue":"1","first_page":"1939","last_page":"1946"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10409","display_name":"Fuel Cells and Related Materials","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10409","display_name":"Fuel Cells and Related Materials","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9472000002861023,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.940500020980835,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8259927034378052},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7046676874160767},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5798172950744629},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5486330986022949},{"id":"https://openalex.org/keywords/policy-transfer","display_name":"Policy transfer","score":0.5412757396697998},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.5235074162483215},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5145732760429382},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5019450187683105},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4982273578643799},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.47263023257255554},{"id":"https://openalex.org/keywords/knowledge-transfer","display_name":"Knowledge transfer","score":0.4243502616882324},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.16610780358314514},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12801101803779602}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8259927034378052},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7046676874160767},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5798172950744629},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5486330986022949},{"id":"https://openalex.org/C2776731479","wikidata":"https://www.wikidata.org/wiki/Q15142682","display_name":"Policy transfer","level":2,"score":0.5412757396697998},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.5235074162483215},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5145732760429382},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5019450187683105},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4982273578643799},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.47263023257255554},{"id":"https://openalex.org/C2776960227","wikidata":"https://www.wikidata.org/wiki/Q2586354","display_name":"Knowledge transfer","level":2,"score":0.4243502616882324},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.16610780358314514},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12801101803779602},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C3116431","wikidata":"https://www.wikidata.org/wiki/Q31728","display_name":"Public administration","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2023.3326867","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3326867","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:37917524","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37917524","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1852921944","display_name":null,"funder_award_id":"2022ZD0119903","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5721963804","display_name":null,"funder_award_id":"U2141234","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8352049440","display_name":null,"funder_award_id":"202106230194","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1533597678","https://openalex.org/W1821462560","https://openalex.org/W2904246096","https://openalex.org/W2982316857","https://openalex.org/W3006344226","https://openalex.org/W3011286351","https://openalex.org/W3173294282","https://openalex.org/W3179972165","https://openalex.org/W3182751084","https://openalex.org/W3197671932","https://openalex.org/W3198015624","https://openalex.org/W3198359393","https://openalex.org/W3208890675","https://openalex.org/W4226257065","https://openalex.org/W4237591687","https://openalex.org/W4283396661","https://openalex.org/W4304480072","https://openalex.org/W4383108600","https://openalex.org/W6698463750","https://openalex.org/W6732837357","https://openalex.org/W6737893269","https://openalex.org/W6738796088","https://openalex.org/W6758924645","https://openalex.org/W6762358693","https://openalex.org/W6779812412","https://openalex.org/W6780587392","https://openalex.org/W6802251749","https://openalex.org/W6802732210","https://openalex.org/W6804456054","https://openalex.org/W6810667139","https://openalex.org/W6845949628","https://openalex.org/W6922480057"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2136522105","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2031695474","https://openalex.org/W2586732548"],"abstract_inverted_index":{"Multiagent":[0],"reinforcement":[1],"learning":[2,48,98,161],"(RL)":[3],"training":[4,22],"is":[5,51],"usually":[6],"difficult":[7,21],"and":[8,43,59,77,103,118,138,163],"time-consuming":[9],"due":[10],"to":[11,53,106],"mutual":[12],"interference":[13],"among":[14],"agents.":[15],"Safety":[16],"concerns":[17],"make":[18],"an":[19],"already":[20],"process":[23],"even":[24],"harder.":[25],"This":[26],"study":[27],"proposes":[28],"a":[29,41,62,135],"safe":[30],"adaptive":[31],"policy":[32,46,75,84,112],"transfer":[33,47,72],"RL":[34],"approach":[35,70],"for":[36],"multiagent":[37],"cooperative":[38],"control.":[39],"Specifically,":[40],"pioneer":[42,65,83,117],"follower":[44,55],"off-policy":[45,87],"(PFOPT)":[49],"method":[50,93,132,152],"presented":[52],"help":[54],"agents":[56,127],"acquire":[57,139],"knowledge":[58],"experience":[60,79,102],"from":[61],"single":[63],"well-trained":[64],"agent.":[66],"Notably,":[67],"the":[68,74,82,86,91,97,107,111,116,119,125,130,140,145,150],"designed":[69],"can":[71,94,133,153],"both":[73],"representation":[76],"sample":[78],"provided":[80],"by":[81,129],"in":[85,158],"learning.":[88],"More":[89],"importantly,":[90],"proposed":[92,131,151],"adaptively":[95],"adjust":[96],"weight":[99],"of":[100,115,147,160],"prior":[101],"exploration":[104],"according":[105],"Wasserstein":[108],"distance":[109],"between":[110],"probability":[113],"distributions":[114],"follower.":[120],"Case":[121],"studies":[122],"show":[123],"that":[124],"distributed":[126],"trained":[128],"complete":[134],"collaborative":[136],"task":[137],"maximum":[141],"rewards":[142],"while":[143],"minimizing":[144],"violation":[146],"constraints.":[148],"Moreover,":[149],"also":[154],"achieve":[155],"satisfactory":[156],"performance":[157],"terms":[159],"speed":[162],"success":[164],"rate.":[165]},"counts_by_year":[{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
