{"id":"https://openalex.org/W4401907136","doi":"https://doi.org/10.1109/icdl61372.2024.10644943","title":"Improving Proximal Policy Optimization Algorithm in Interactive Multi-Agent Systems","display_name":"Improving Proximal Policy Optimization Algorithm in Interactive Multi-Agent Systems","publication_year":2024,"publication_date":"2024-05-20","ids":{"openalex":"https://openalex.org/W4401907136","doi":"https://doi.org/10.1109/icdl61372.2024.10644943"},"language":"en","primary_location":{"id":"doi:10.1109/icdl61372.2024.10644943","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icdl61372.2024.10644943","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Development and Learning (ICDL)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://research.rug.nl/en/publications/b4a5c8f2-179e-4bb3-87e2-fb7f4a5fb93b","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112541716","display_name":"Yi Shang","orcid":null},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Yi Shang","raw_affiliation_strings":["School of Computer Science and Engineering, University of New South Wales,Sydney,NSW,Australia"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, University of New South Wales,Sydney,NSW,Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101401585","display_name":"Yifei Chen","orcid":"https://orcid.org/0000-0002-7702-2107"},"institutions":[{"id":"https://openalex.org/I169381384","display_name":"University of Groningen","ror":"https://ror.org/012p63287","country_code":"NL","type":"education","lineage":["https://openalex.org/I169381384"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Yifei Chen","raw_affiliation_strings":["Bernoulli Institute for Mathematics, Computer Science and Artificial Intelligence, University of Groningen,The Netherlands"],"affiliations":[{"raw_affiliation_string":"Bernoulli Institute for Mathematics, Computer Science and Artificial Intelligence, University of Groningen,The Netherlands","institution_ids":["https://openalex.org/I169381384"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044098673","display_name":"Francisco Cruz","orcid":"https://orcid.org/0000-0002-1131-3382"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Francisco Cruz","raw_affiliation_strings":["School of Computer Science and Engineering, University of New South Wales,Sydney,NSW,Australia"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, University of New South Wales,Sydney,NSW,Australia","institution_ids":["https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5112541716"],"corresponding_institution_ids":["https://openalex.org/I31746571"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12027936,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12288","display_name":"Optimization and Search Problems","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6801937818527222},{"id":"https://openalex.org/keywords/optimization-algorithm","display_name":"Optimization algorithm","score":0.4546315371990204},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4324708580970764},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3339160680770874},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.147051602602005}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6801937818527222},{"id":"https://openalex.org/C2987595161","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Optimization algorithm","level":2,"score":0.4546315371990204},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4324708580970764},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3339160680770874},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.147051602602005}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icdl61372.2024.10644943","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icdl61372.2024.10644943","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Development and Learning (ICDL)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.rug.nl:openaire_cris_publications/b4a5c8f2-179e-4bb3-87e2-fb7f4a5fb93b","is_oa":true,"landing_page_url":"https://research.rug.nl/en/publications/b4a5c8f2-179e-4bb3-87e2-fb7f4a5fb93b","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Shang, Y, Chen, Y & Cruz, F 2024, Improving Proximal Policy Optimization Algorithm in Interactive Multi-Agent Systems. in 2024 IEEE International Conference on Development and Learning, ICDL 2024. IEEE, 2024 IEEE International Conference on Development and Learning, ICDL 2024, Austin, United States, 20/05/2024. https://doi.org/10.1109/ICDL61372.2024.10644943","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.rug.nl:openaire/b4a5c8f2-179e-4bb3-87e2-fb7f4a5fb93b","is_oa":true,"landing_page_url":"https://hdl.handle.net/11370/b4a5c8f2-179e-4bb3-87e2-fb7f4a5fb93b","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Shang, Y, Chen, Y & Cruz, F 2024, Improving Proximal Policy Optimization Algorithm in Interactive Multi-Agent Systems. in 2024 IEEE International Conference on Development and Learning, ICDL 2024. IEEE, 2024 IEEE International Conference on Development and Learning, ICDL 2024, Austin, United States, 20/05/2024. https://doi.org/10.1109/ICDL61372.2024.10644943","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:pure.rug.nl:openaire_cris_publications/b4a5c8f2-179e-4bb3-87e2-fb7f4a5fb93b","is_oa":true,"landing_page_url":"https://research.rug.nl/en/publications/b4a5c8f2-179e-4bb3-87e2-fb7f4a5fb93b","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Shang, Y, Chen, Y & Cruz, F 2024, Improving Proximal Policy Optimization Algorithm in Interactive Multi-Agent Systems. in 2024 IEEE International Conference on Development and Learning, ICDL 2024. IEEE, 2024 IEEE International Conference on Development and Learning, ICDL 2024, Austin, United States, 20/05/2024. https://doi.org/10.1109/ICDL61372.2024.10644943","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2736601468","https://openalex.org/W2763208138","https://openalex.org/W2768629321","https://openalex.org/W2781726626","https://openalex.org/W2787938642","https://openalex.org/W2795786572","https://openalex.org/W2973229164","https://openalex.org/W2975684468","https://openalex.org/W2982316857","https://openalex.org/W3030840723","https://openalex.org/W3212907084","https://openalex.org/W4214717370","https://openalex.org/W4382202976","https://openalex.org/W6741002519"],"related_works":["https://openalex.org/W4366549320","https://openalex.org/W4287863136","https://openalex.org/W3006015132","https://openalex.org/W2965078190","https://openalex.org/W2140656843","https://openalex.org/W3040644038","https://openalex.org/W4394870774","https://openalex.org/W4311207076","https://openalex.org/W3203633096","https://openalex.org/W2355226480"],"abstract_inverted_index":{"Proximal":[0],"Policy":[1],"Optimization":[2],"(PPO),":[3],"as":[4,38],"an":[5,39],"outstanding":[6],"Reinforcement":[7],"learning":[8,26],"(RL)":[9],"algorithm,":[10,41],"has":[11,29],"proven":[12],"its":[13],"efficiency":[14],"when":[15,116],"solving":[16],"a":[17,80,124],"wide":[18],"range":[19],"of":[20,32,47,73,101,140],"problems.":[21],"Compared":[22],"to":[23,67,88,110,136],"other":[24],"reinforcement":[25],"algorithms,":[27],"it":[28,42],"the":[30,45,70,74,84,93,99,107,112,118,137,141],"advantage":[31],"advanced":[33],"stability":[34],"and":[35,50,64,114],"reliability.":[36],"However,":[37],"on-policy":[40],"suffers":[43],"from":[44],"problem":[46],"sample":[48],"inefficiency":[49],"moderate":[51],"training":[52,71,138],"speed.":[53],"In":[54],"this":[55],"paper,":[56],"we":[57,78],"utilize":[58],"two":[59],"methods,":[60],"namely,":[61],"share":[62,65],"parameter":[63],"trajectory":[66],"speed":[68],"up":[69],"process":[72,139],"PPO":[75,143],"algorithm.":[76,144],"Moreover,":[77],"introduce":[79,98],"method":[81,109],"that":[82],"uses":[83],"adaptive":[85],"blending":[86],"concept":[87],"prevent":[89],"unnecessary":[90],"updates":[91],"during":[92],"parameter-sharing":[94],"process.":[95],"We":[96],"also":[97],"technique":[100],"possibility":[102],"for":[103],"selection,":[104],"along":[105],"with":[106],"thresholding":[108],"balance":[111],"exploitation":[113],"exploration":[115],"incorporating":[117],"trajectory-sharing":[119],"method.":[120],"Tests":[121],"performed":[122],"under":[123],"multi-agent":[125],"environment":[126],"setup":[127],"show":[128],"both":[129],"methods":[130],"converge":[131],"significantly":[132],"faster":[133],"in":[134],"comparison":[135],"traditional":[142]},"counts_by_year":[],"updated_date":"2026-03-04T09:10:02.777135","created_date":"2025-10-10T00:00:00"}