{"id":"https://openalex.org/W3007689182","doi":"https://doi.org/10.1109/ssci44817.2019.9003120","title":"Multi-Agent Reinforcement Learning Based on Clustering in Two-Player Games","display_name":"Multi-Agent Reinforcement Learning Based on Clustering in Two-Player Games","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W3007689182","doi":"https://doi.org/10.1109/ssci44817.2019.9003120","mag":"3007689182"},"language":"en","primary_location":{"id":"doi:10.1109/ssci44817.2019.9003120","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ssci44817.2019.9003120","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Symposium Series on Computational Intelligence (SSCI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006442353","display_name":"Weifan Li","orcid":"https://orcid.org/0000-0003-1871-8355"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weifan Li","raw_affiliation_strings":["State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Beijing, China","institution_ids":["https://openalex.org/I4210094879"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035107327","display_name":"Yuanheng Zhuand","orcid":null},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanheng Zhuand","raw_affiliation_strings":["State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Beijing, China","institution_ids":["https://openalex.org/I4210094879"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100624298","display_name":"Dongbin Zhao","orcid":"https://orcid.org/0000-0001-8218-9633"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongbin Zhao","raw_affiliation_strings":["State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Beijing, China","institution_ids":["https://openalex.org/I4210094879"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5006442353"],"corresponding_institution_ids":["https://openalex.org/I4210094879"],"apc_list":null,"apc_paid":null,"fwci":0.42,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.72751647,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"96","issue":null,"first_page":"57","last_page":"63"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9728000164031982,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8685100078582764},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8046002984046936},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7207202911376953},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.5916250944137573},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5253015160560608},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40592241287231445}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8685100078582764},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8046002984046936},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7207202911376953},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.5916250944137573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5253015160560608},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40592241287231445}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ssci44817.2019.9003120","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ssci44817.2019.9003120","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Symposium Series on Computational Intelligence (SSCI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1164749991","https://openalex.org/W1540725368","https://openalex.org/W1673310716","https://openalex.org/W2002256627","https://openalex.org/W2096145798","https://openalex.org/W2105643086","https://openalex.org/W2120327309","https://openalex.org/W2125510930","https://openalex.org/W2125710232","https://openalex.org/W2138362680","https://openalex.org/W2144274908","https://openalex.org/W2160808139","https://openalex.org/W2270696664","https://openalex.org/W2291986326","https://openalex.org/W2395575420","https://openalex.org/W2576265825","https://openalex.org/W2603266952","https://openalex.org/W2617547828","https://openalex.org/W2736601468","https://openalex.org/W2765672669","https://openalex.org/W2770884134","https://openalex.org/W2780635050","https://openalex.org/W2785556016","https://openalex.org/W2786928559","https://openalex.org/W2797527950","https://openalex.org/W2803308811","https://openalex.org/W2807741983","https://openalex.org/W2897822515","https://openalex.org/W2899685447","https://openalex.org/W2942517366","https://openalex.org/W2950708852","https://openalex.org/W2962938168","https://openalex.org/W2963000099","https://openalex.org/W2963328631","https://openalex.org/W2963423916","https://openalex.org/W2963890729","https://openalex.org/W2963937357","https://openalex.org/W2964164283","https://openalex.org/W2974589008","https://openalex.org/W3093287223","https://openalex.org/W3105048218","https://openalex.org/W4295598622","https://openalex.org/W4297789683","https://openalex.org/W4298876402","https://openalex.org/W6637131181","https://openalex.org/W6678635437","https://openalex.org/W6678900246","https://openalex.org/W6680197816","https://openalex.org/W6681187623","https://openalex.org/W6683300800","https://openalex.org/W6694177581","https://openalex.org/W6696772115","https://openalex.org/W6712181171","https://openalex.org/W6731942744","https://openalex.org/W6741002519","https://openalex.org/W6743367460","https://openalex.org/W6746462176","https://openalex.org/W6748554570","https://openalex.org/W6749304979","https://openalex.org/W6750629867","https://openalex.org/W6751629939","https://openalex.org/W6752380930"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347","https://openalex.org/W4210805261"],"abstract_inverted_index":{"Non-stationary":[0],"environment":[1,9,14,25],"is":[2,15,40,63,81,116],"general":[3],"in":[4,44,168],"real":[5],"environment,":[6],"including":[7,129],"adversarial":[8],"and":[10,96,133],"multi-agent":[11,127,169],"problem.":[12],"Multi-agent":[13],"a":[16,28,110],"typical":[17],"non-stationary":[18],"environment.":[19,54],"Each":[20],"agent":[21,47,114],"of":[22,53,104],"the":[23,33,41,97,102,105,120,144,148],"shared":[24],"must":[26],"learn":[27],"efficient":[29],"interaction":[30],"for":[31,86],"maximizing":[32],"expected":[34],"reward.":[35],"Independent":[36],"reinforcement":[37,71,79],"learning":[38,67,80,166],"(InRL)":[39],"simplest":[42],"form":[43],"which":[45,62],"each":[46],"treats":[48],"other":[49,164],"agents":[50],"as":[51],"part":[52],"In":[55],"this":[56],"paper,":[57],"we":[58],"present":[59],"Max-Mean-Learning-Win-or-Learn-Fast":[60],"(MML-WoLF),":[61],"an":[64],"independent":[65,165],"on-policy":[66],"algorithm":[68,125],"based":[69,77],"on":[70,78,90],"clustering.":[72,88],"A":[73],"variational":[74],"auto-encoder":[75],"method":[76,158],"proposed":[82],"to":[83,100,109,126,142],"extract":[84],"features":[85],"unsupervised":[87],"Based":[89],"clustering":[91,138],"results,":[92],"MML-WoLF":[93],"uses":[94],"statistics":[95],"dominated":[98],"factor":[99],"calculate":[101],"values":[103],"states":[106],"that":[107,156],"belong":[108],"certain":[111],"category.":[112],"The":[113,137,152],"policy":[115],"iteratively":[117],"updated":[118],"by":[119],"value.":[121],"We":[122],"apply":[123],"our":[124,157],"problems":[128],"matrix-game,":[130],"grid":[131],"world,":[132],"continuous":[134],"world":[135],"game.":[136],"results":[139,154],"are":[140],"able":[141],"show":[143],"strategies":[145],"distribution":[146],"under":[147],"agent's":[149],"current":[150],"policy.":[151],"experiment":[153],"suggest":[155],"significantly":[159],"improves":[160],"average":[161],"performance":[162],"over":[163],"algorithms":[167],"problems.":[170]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
