{"id":"https://openalex.org/W7125945609","doi":"https://doi.org/10.1109/smc58881.2025.11342540","title":"Adaptive Multi-Layer Prioritized Fictitious Self-play in Multi-Agent Reinforcement Learning","display_name":"Adaptive Multi-Layer Prioritized Fictitious Self-play in Multi-Agent Reinforcement Learning","publication_year":2025,"publication_date":"2025-10-05","ids":{"openalex":"https://openalex.org/W7125945609","doi":"https://doi.org/10.1109/smc58881.2025.11342540"},"language":null,"primary_location":{"id":"doi:10.1109/smc58881.2025.11342540","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342540","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109778682","display_name":"Chao Yue","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chao Yue","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Engineering Science,Beijing,China,100049"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Engineering Science,Beijing,China,100049","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124140501","display_name":"Jian Xue","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Xue","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Engineering Science,Beijing,China,100049"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Engineering Science,Beijing,China,100049","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124111082","display_name":"Lin Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Zhao","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Engineering Science,Beijing,China,100049"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Engineering Science,Beijing,China,100049","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033030877","display_name":"Lei Yuan","orcid":"https://orcid.org/0000-0002-2293-0626"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Lei","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Engineering Science,Beijing,China,100049"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Engineering Science,Beijing,China,100049","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124080344","display_name":"Shun Mao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shun Mao","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Engineering Science,Beijing,China,100049"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Engineering Science,Beijing,China,100049","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079281855","display_name":"Ke L\u00fc","orcid":"https://orcid.org/0000-0002-3456-4993"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ke Lu","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Engineering Science,Beijing,China,100049"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Engineering Science,Beijing,China,100049","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5109778682"],"corresponding_institution_ids":["https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.87220912,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4069","last_page":"4075"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.871999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.871999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.023000000044703484,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.019200000911951065,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7555999755859375},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5873000025749207},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5091999769210815},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5002999901771545},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.43880000710487366},{"id":"https://openalex.org/keywords/stochastic-game","display_name":"Stochastic game","score":0.400299996137619},{"id":"https://openalex.org/keywords/bilevel-optimization","display_name":"Bilevel optimization","score":0.35030001401901245}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7555999755859375},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7246000170707703},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5873000025749207},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5091999769210815},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5002999901771545},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.43880000710487366},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4275999963283539},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4153999984264374},{"id":"https://openalex.org/C22171661","wikidata":"https://www.wikidata.org/wiki/Q1074380","display_name":"Stochastic game","level":2,"score":0.400299996137619},{"id":"https://openalex.org/C3309286","wikidata":"https://www.wikidata.org/wiki/Q4907693","display_name":"Bilevel optimization","level":3,"score":0.35030001401901245},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33149999380111694},{"id":"https://openalex.org/C167393769","wikidata":"https://www.wikidata.org/wiki/Q4680768","display_name":"Adaptive strategies","level":2,"score":0.2964000105857849},{"id":"https://openalex.org/C52970973","wikidata":"https://www.wikidata.org/wiki/Q2497134","display_name":"Adaptive system","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C2778444522","wikidata":"https://www.wikidata.org/wiki/Q1081491","display_name":"Football","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.28189998865127563},{"id":"https://openalex.org/C2781395549","wikidata":"https://www.wikidata.org/wiki/Q4680762","display_name":"Adaptive sampling","level":3,"score":0.2694999873638153},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.257099986076355},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc58881.2025.11342540","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11342540","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1530302171","https://openalex.org/W2028798910","https://openalex.org/W2044121814","https://openalex.org/W2097160297","https://openalex.org/W2145339207","https://openalex.org/W2791379113","https://openalex.org/W2963390429","https://openalex.org/W2997502221","https://openalex.org/W4390814006","https://openalex.org/W4415428471","https://openalex.org/W7124145361","https://openalex.org/W7124290073"],"related_works":[],"abstract_inverted_index":{"In":[0],"the":[1,32,80,86,93,97,122,138,143,156,161],"field":[2],"of":[3,36,53,88,96,124,148,160],"Multi-Agent":[4],"Reinforcement":[5],"Learning":[6],"(MARL),":[7],"strategy":[8,89,112,131],"selection":[9],"and":[10,31,59,103,146,158,169],"optimization":[11,90],"are":[12],"key":[13],"challenges":[14],"in":[15,29,45,72,99,127,150,155,171],"improving":[16],"agent":[17],"performance.":[18],"The":[19,133],"Policy":[20],"Space":[21],"Response":[22],"Oracles":[23],"(PSRO)":[24],"algorithm":[25],"is":[26,34,70],"widely":[27],"used":[28],"MARL,":[30],"meta-solver":[33,77],"one":[35],"its":[37,167],"cores.":[38],"However,":[39],"existing":[40],"meta-solvers":[41,126],"may":[42],"exhibit":[43],"limitations":[44],"complex":[46,130,151],"MARL":[47,152],"environments,":[48,153],"such":[49],"as":[50,75],"significant":[51],"consumption":[52],"computing":[54],"resources,":[55],"poor":[56],"convergence,":[57],"instability,":[58],"so":[60],"on.":[61],"Therefore,":[62],"an":[63],"adaptive":[64],"multi-layer":[65,107],"Prioritized":[66],"Fictitious":[67],"Self-Play":[68],"(PFSP)":[69],"proposed":[71,139],"this":[73],"paper":[74],"a":[76,100],"method":[78,140],"for":[79],"PSRO":[81],"algorithm,":[82],"which":[83],"further":[84],"improves":[85,142],"effectiveness":[87],"by":[91],"utilizing":[92],"game":[94],"results":[95,135],"meta-game":[98],"more":[101],"efficient":[102],"reasonable":[104],"way.":[105],"Adaptive":[106],"PFSP":[108],"can":[109],"flexibly":[110],"make":[111],"choices":[113],"based":[114],"on":[115],"payoff":[116],"at":[117],"different":[118],"layers,":[119],"thus":[120],"overcoming":[121],"shortcomings":[123],"traditional":[125],"dealing":[128],"with":[129],"spaces.":[132],"experimental":[134],"show":[136],"that":[137],"significantly":[141],"convergence":[144],"speed":[145],"performance":[147],"strategies":[149],"especially":[154],"training":[157],"testing":[159],"Google":[162],"Research":[163],"Football":[164],"environment,":[165],"demonstrating":[166],"potential":[168],"advantages":[170],"practical":[172],"applications.":[173]},"counts_by_year":[],"updated_date":"2026-01-29T23:17:01.242718","created_date":"2026-01-29T00:00:00"}
