{"id":"https://openalex.org/W2338351427","doi":"https://doi.org/10.1109/tcyb.2016.2544866","title":"FMRQ\u2014A Multiagent Reinforcement Learning Algorithm for Fully Cooperative Tasks","display_name":"FMRQ\u2014A Multiagent Reinforcement Learning Algorithm for Fully Cooperative Tasks","publication_year":2016,"publication_date":"2016-04-14","ids":{"openalex":"https://openalex.org/W2338351427","doi":"https://doi.org/10.1109/tcyb.2016.2544866","mag":"2338351427","pmid":"https://pubmed.ncbi.nlm.nih.gov/27101627"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2016.2544866","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2016.2544866","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100390176","display_name":"Zhen Zhang","orcid":"https://orcid.org/0000-0002-6615-629X"},"institutions":[{"id":"https://openalex.org/I108688024","display_name":"Qingdao University","ror":"https://ror.org/021cj6z65","country_code":"CN","type":"education","lineage":["https://openalex.org/I108688024"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhen Zhang","raw_affiliation_strings":["College of Automation Engineering, Qingdao University, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"College of Automation Engineering, Qingdao University, Qingdao, China","institution_ids":["https://openalex.org/I108688024"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100624298","display_name":"Dongbin Zhao","orcid":"https://orcid.org/0000-0001-8218-9633"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongbin Zhao","raw_affiliation_strings":["State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101566378","display_name":"Junwei Gao","orcid":"https://orcid.org/0000-0001-8870-2960"},"institutions":[{"id":"https://openalex.org/I108688024","display_name":"Qingdao University","ror":"https://ror.org/021cj6z65","country_code":"CN","type":"education","lineage":["https://openalex.org/I108688024"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junwei Gao","raw_affiliation_strings":["College of Automation Engineering, Qingdao University, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"College of Automation Engineering, Qingdao University, Qingdao, China","institution_ids":["https://openalex.org/I108688024"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100670772","display_name":"Dongqing Wang","orcid":"https://orcid.org/0000-0001-8856-4289"},"institutions":[{"id":"https://openalex.org/I108688024","display_name":"Qingdao University","ror":"https://ror.org/021cj6z65","country_code":"CN","type":"education","lineage":["https://openalex.org/I108688024"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongqing Wang","raw_affiliation_strings":["College of Automation Engineering, Qingdao University, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"College of Automation Engineering, Qingdao University, Qingdao, China","institution_ids":["https://openalex.org/I108688024"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045525711","display_name":"Yujie Dai","orcid":"https://orcid.org/0009-0000-6368-5033"},"institutions":[{"id":"https://openalex.org/I4210141966","display_name":"China Academy of Railway Sciences","ror":"https://ror.org/051wv2j09","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210141966"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yujie Dai","raw_affiliation_strings":["China Academy of Railway Sciences, Transportation and Economics Institute, Beijing, China"],"affiliations":[{"raw_affiliation_string":"China Academy of Railway Sciences, Transportation and Economics Institute, Beijing, China","institution_ids":["https://openalex.org/I4210141966"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100390176"],"corresponding_institution_ids":["https://openalex.org/I108688024"],"apc_list":null,"apc_paid":null,"fwci":11.9974,"has_fulltext":false,"cited_by_count":72,"citation_normalized_percentile":{"value":0.98500179,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":"47","issue":"6","first_page":"1367","last_page":"1379"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9775000214576721,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10249","display_name":"Distributed Control Multi-Agent Systems","score":0.9315000176429749,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9026905298233032},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7521185874938965},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.6044483780860901},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5681362152099609},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5290863513946533},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5231674313545227},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.4355934262275696},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.4208362102508545},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4068751037120819},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3199493885040283},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3147669732570648},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1567678451538086},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06083795428276062}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9026905298233032},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7521185874938965},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.6044483780860901},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5681362152099609},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5290863513946533},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5231674313545227},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.4355934262275696},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4208362102508545},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4068751037120819},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3199493885040283},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3147669732570648},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1567678451538086},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06083795428276062},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcyb.2016.2544866","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2016.2544866","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:27101627","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/27101627","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1731450854","display_name":null,"funder_award_id":"61273136","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G209715934","display_name":null,"funder_award_id":"61573205","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5745859652","display_name":null,"funder_award_id":"61573353","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7163010157","display_name":null,"funder_award_id":"61533017","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W1491317138","https://openalex.org/W1514633502","https://openalex.org/W1542941925","https://openalex.org/W1552825266","https://openalex.org/W1607392272","https://openalex.org/W1664884371","https://openalex.org/W1963530623","https://openalex.org/W1972994260","https://openalex.org/W1974015235","https://openalex.org/W1974740629","https://openalex.org/W1976473913","https://openalex.org/W1978375026","https://openalex.org/W1980497227","https://openalex.org/W1999035253","https://openalex.org/W2000029621","https://openalex.org/W2029250042","https://openalex.org/W2032126548","https://openalex.org/W2032378315","https://openalex.org/W2033731884","https://openalex.org/W2045817941","https://openalex.org/W2049040026","https://openalex.org/W2051586379","https://openalex.org/W2070310653","https://openalex.org/W2070963703","https://openalex.org/W2077671246","https://openalex.org/W2083535091","https://openalex.org/W2099618002","https://openalex.org/W2104602264","https://openalex.org/W2105416261","https://openalex.org/W2108892923","https://openalex.org/W2110906765","https://openalex.org/W2120327309","https://openalex.org/W2136934807","https://openalex.org/W2141837602","https://openalex.org/W2145143778","https://openalex.org/W2159571856","https://openalex.org/W2345224123","https://openalex.org/W2507578125","https://openalex.org/W3139377883","https://openalex.org/W4255047891","https://openalex.org/W6630755865","https://openalex.org/W6636288269","https://openalex.org/W6637180814","https://openalex.org/W6645570421","https://openalex.org/W6662570204","https://openalex.org/W6675811377","https://openalex.org/W6676769901","https://openalex.org/W6680209877","https://openalex.org/W6704755794","https://openalex.org/W6792155000"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"propose":[4],"a":[5],"multiagent":[6,44],"reinforcement":[7,63],"learning":[8],"algorithm":[9,16,157],"dealing":[10],"with":[11,131,160],"fully":[12],"cooperative":[13],"tasks.":[14],"The":[15,46],"is":[17,59,110,140,146],"called":[18],"frequency":[19,47],"of":[20,31,48,56,74,95,100,106,118],"the":[21,32,40,50,62,72,75,119,143,155],"maximum":[22],"reward":[23,54,58,85],"Q-learning":[24],"(FMRQ).":[25],"FMRQ":[26,66,91,113],"aims":[27],"to":[28,38,116],"achieve":[29],"one":[30,104,117,145],"optimal":[33,120],"Nash":[34,121],"equilibria":[35,122],"so":[36],"as":[37,61],"optimize":[39],"performance":[41],"index":[42],"in":[43,123],"systems.":[45],"obtaining":[49],"highest":[51],"global":[52],"immediate":[53,57],"instead":[55],"used":[60],"signal.":[64],"With":[65],"each":[67,87],"agent":[68],"does":[69],"not":[70],"need":[71],"observation":[73],"other":[76,144],"agents'":[77],"actions":[78],"and":[79,84,103,134,142],"only":[80],"shares":[81],"its":[82],"state":[83],"at":[86],"step.":[88],"We":[89],"validate":[90],"through":[92],"case":[93,105],"studies":[94],"repeated":[96],"games:":[97],"four":[98],"cases":[99],"two-player":[101],"two-action":[102],"three-player":[107],"two-action.":[108],"It":[109],"demonstrated":[111],"that":[112,154],"can":[114],"converge":[115],"these":[124],"cases.":[125],"Moreover,":[126],"comparison":[127],"experiments":[128],"on":[129],"tasks":[130],"multiple":[132],"states":[133],"finite":[135],"steps":[136],"are":[137],"conducted.":[138],"One":[139],"box-pushing":[141],"distributed":[147],"sensor":[148],"network":[149],"problem.":[150],"Experimental":[151],"results":[152],"show":[153],"proposed":[156],"outperforms":[158],"others":[159],"higher":[161],"performance.":[162]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":15},{"year":2019,"cited_by_count":15},{"year":2018,"cited_by_count":10},{"year":2017,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
