{"id":"https://openalex.org/W4401567261","doi":"https://doi.org/10.1109/tnnls.2024.3437366","title":"Boosting Weak-to-Strong Agents in Multiagent Reinforcement Learning via Balanced PPO","display_name":"Boosting Weak-to-Strong Agents in Multiagent Reinforcement Learning via Balanced PPO","publication_year":2024,"publication_date":"2024-08-14","ids":{"openalex":"https://openalex.org/W4401567261","doi":"https://doi.org/10.1109/tnnls.2024.3437366","pmid":"https://pubmed.ncbi.nlm.nih.gov/39141463"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2024.3437366","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3437366","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001147061","display_name":"Sili Huang","orcid":"https://orcid.org/0000-0001-5387-7904"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Sili Huang","raw_affiliation_strings":["Key Laboratory of Symbolic Computation and Knowledge Engineer of Ministry of Education and the School of Artificial Intelligence, Jilin University, Changchun, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Symbolic Computation and Knowledge Engineer of Ministry of Education and the School of Artificial Intelligence, Jilin University, Changchun, China","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108294333","display_name":"Hechang Chen","orcid":"https://orcid.org/0000-0001-7835-9556"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hechang Chen","raw_affiliation_strings":["School of Artificial Intelligence, Jilin University, Changchun, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Jilin University, Changchun, China","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004540162","display_name":"Haiyin Piao","orcid":"https://orcid.org/0000-0002-8519-4750"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haiyin Piao","raw_affiliation_strings":["School of Artificial Intelligence, Jilin University, Changchun, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Jilin University, Changchun, China","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101241132","display_name":"Zhixiao Sun","orcid":"https://orcid.org/0000-0003-0018-2337"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhixiao Sun","raw_affiliation_strings":["Unmanned System Research Institute, Northwestern Polytechnical University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"Unmanned System Research Institute, Northwestern Polytechnical University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029392006","display_name":"Yi Chang","orcid":"https://orcid.org/0000-0003-2697-8093"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Chang","raw_affiliation_strings":["School of Artificial Intelligence, Jilin University, Changchun, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Jilin University, Changchun, China","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015105117","display_name":"Lichao Sun","orcid":"https://orcid.org/0000-0003-1539-7939"},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lichao Sun","raw_affiliation_strings":["Department of Computer Science and Engineering, Lehigh University, Bethlehem, PA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Lehigh University, Bethlehem, PA, USA","institution_ids":["https://openalex.org/I186143895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028694889","display_name":"Bo Yang","orcid":"https://orcid.org/0000-0003-1927-8419"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Yang","raw_affiliation_strings":["Key Laboratory of Symbolic Computation and Knowledge Engineer of Ministry of Education and the School of Computer Science and Technology, Jilin University, Changchun, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Symbolic Computation and Knowledge Engineer of Ministry of Education and the School of Computer Science and Technology, Jilin University, Changchun, China","institution_ids":["https://openalex.org/I194450716"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5001147061"],"corresponding_institution_ids":["https://openalex.org/I194450716"],"apc_list":null,"apc_paid":null,"fwci":0.3636,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.65935961,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"36","issue":"5","first_page":"9136","last_page":"9149"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9485999941825867,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9485999941825867,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.7209179401397705},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7170970439910889},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.6024807691574097},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.49435877799987793},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.42051059007644653},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3608030676841736},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.13347092270851135},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.06612503528594971}],"concepts":[{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.7209179401397705},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7170970439910889},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.6024807691574097},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.49435877799987793},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.42051059007644653},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3608030676841736},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.13347092270851135},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.06612503528594971}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2024.3437366","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2024.3437366","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:39141463","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39141463","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1211044700","display_name":null,"funder_award_id":"U2341229","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4857404650","display_name":null,"funder_award_id":"2021ZD0112500","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G524871825","display_name":null,"funder_award_id":"61976102","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6099864209","display_name":null,"funder_award_id":"62206105","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G74648278","display_name":null,"funder_award_id":"62202200","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7526664217","display_name":null,"funder_award_id":"62172185","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8161903434","display_name":null,"funder_award_id":"U22A2098","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W2145339207","https://openalex.org/W2747213132","https://openalex.org/W2904246096","https://openalex.org/W2963523627","https://openalex.org/W2997502221","https://openalex.org/W3036329728","https://openalex.org/W3057801323","https://openalex.org/W3099689767","https://openalex.org/W3116531410","https://openalex.org/W3173211893","https://openalex.org/W3187550742","https://openalex.org/W3193590406","https://openalex.org/W4205501012","https://openalex.org/W4212814156","https://openalex.org/W4280586962","https://openalex.org/W4288049695","https://openalex.org/W4307230233","https://openalex.org/W4308365139","https://openalex.org/W4309367642","https://openalex.org/W4313291205","https://openalex.org/W4319165238","https://openalex.org/W6627932998","https://openalex.org/W6687681856","https://openalex.org/W6738796088","https://openalex.org/W6741002519","https://openalex.org/W6744562401","https://openalex.org/W6747473740","https://openalex.org/W6749304979","https://openalex.org/W6752380930","https://openalex.org/W6756287877","https://openalex.org/W6758763022","https://openalex.org/W6758846586","https://openalex.org/W6762491519","https://openalex.org/W6762492177","https://openalex.org/W6764943343","https://openalex.org/W6769091550","https://openalex.org/W6769462197","https://openalex.org/W6775529125","https://openalex.org/W6780178658","https://openalex.org/W6780396752","https://openalex.org/W6781750019","https://openalex.org/W6783196708","https://openalex.org/W6787618087","https://openalex.org/W6795908048","https://openalex.org/W6796861069","https://openalex.org/W6797584944","https://openalex.org/W6810677780","https://openalex.org/W6840380725","https://openalex.org/W6846490732","https://openalex.org/W6853065473","https://openalex.org/W6857963045","https://openalex.org/W6868778457"],"related_works":["https://openalex.org/W2125652721","https://openalex.org/W1540371141","https://openalex.org/W4231274751","https://openalex.org/W1549363203","https://openalex.org/W2154063878","https://openalex.org/W2556012038","https://openalex.org/W1489772951","https://openalex.org/W2920061524","https://openalex.org/W4310083477","https://openalex.org/W1538046993"],"abstract_inverted_index":{"Multiagent":[0],"policy":[1,68,78],"gradients":[2],"(MAPGs),":[3],"an":[4,54],"essential":[5],"branch":[6],"of":[7,31,43,76,95],"reinforcement":[8],"learning":[9,75,110],"(RL),":[10],"have":[11],"made":[12],"great":[13],"progress":[14],"in":[15,46,105,142],"both":[16],"industry":[17],"and":[18,49,108,138,144,153,171],"academia.":[19],"However,":[20],"existing":[21],"models":[22,164],"do":[23],"not":[24],"pay":[25],"attention":[26],"to":[27,72,100],"the":[28,36,41,61,74,82,93,102,106,135,149,158,167],"inadequate":[29],"training":[30,45,83,103,151],"individual":[32],"policies,":[33,97],"thus":[34],"limiting":[35],"overall":[37],"performance.":[38],"We":[39],"verify":[40],"existence":[42],"imbalanced":[44,150],"multiagent":[47],"tasks":[48],"formally":[50],"define":[51],"it":[52],"as":[53],"imbalance":[55],"between":[56],"policies":[57],"(IBPs).":[58],"To":[59,112],"address":[60],"IBP":[62],"issue,":[63],"we":[64,116],"propose":[65],"a":[66,118,125],"dynamic":[67],"balance":[69,73],"(DPB)":[70],"model":[71],"each":[77],"by":[79],"dynamically":[80],"reweighting":[81],"samples.":[84],"In":[85],"addition,":[86],"current":[87],"methods":[88,170],"for":[89,131],"better":[90],"performance":[91,175],"strengthen":[92],"exploration":[94,127,155],"all":[96],"which":[98],"leads":[99],"disregarding":[101],"differences":[104],"team":[107],"reducing":[109],"efficiency.":[111,156],"overcome":[113],"this":[114],"drawback,":[115],"derive":[117],"technique":[119],"named":[120],"weighted":[121],"entropy":[122],"regularization":[123],"(WER),":[124],"team-level":[126],"with":[128],"additional":[129],"incentives":[130],"individuals":[132],"who":[133],"exceed":[134],"team.":[136],"DPB":[137],"WER":[139],"are":[140],"evaluated":[141],"homogeneous":[143],"heterogeneous":[145],"tasks,":[146],"effectively":[147],"alleviating":[148],"problem":[152],"improving":[154],"Furthermore,":[157],"experimental":[159],"results":[160],"show":[161],"that":[162],"our":[163],"can":[165],"outperform":[166],"state-of-the-art":[168],"MAPG":[169],"boast":[172],"over":[173],"12.1%":[174],"gain":[176],"on":[177],"average.":[178]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
