{"id":"https://openalex.org/W3173211893","doi":"https://doi.org/10.1109/tnnls.2021.3089493","title":"SMIX(\u03bb): Enhancing Centralized Value Functions for Cooperative Multiagent Reinforcement Learning","display_name":"SMIX(\u03bb): Enhancing Centralized Value Functions for Cooperative Multiagent Reinforcement Learning","publication_year":2021,"publication_date":"2021-06-28","ids":{"openalex":"https://openalex.org/W3173211893","doi":"https://doi.org/10.1109/tnnls.2021.3089493","mag":"3173211893","pmid":"https://pubmed.ncbi.nlm.nih.gov/34181556"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2021.3089493","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3089493","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083980038","display_name":"Xinghu Yao","orcid":"https://orcid.org/0000-0002-8323-4114"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinghu Yao","raw_affiliation_strings":["College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics, MIIT Key Laboratory of Pattern Analysis and Machine Intelligence, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-8323-4114","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics, MIIT Key Laboratory of Pattern Analysis and Machine Intelligence, Nanjing, China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056781404","display_name":"Chao Wen","orcid":"https://orcid.org/0000-0002-1676-5177"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Wen","raw_affiliation_strings":["College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics, MIIT Key Laboratory of Pattern Analysis and Machine Intelligence, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-1676-5177","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics, MIIT Key Laboratory of Pattern Analysis and Machine Intelligence, Nanjing, China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100330357","display_name":"Yuhui Wang","orcid":"https://orcid.org/0000-0002-0502-7486"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhui Wang","raw_affiliation_strings":["College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics, MIIT Key Laboratory of Pattern Analysis and Machine Intelligence, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-0502-7486","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics, MIIT Key Laboratory of Pattern Analysis and Machine Intelligence, Nanjing, China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004478562","display_name":"Xiaoyang Tan","orcid":"https://orcid.org/0000-0002-2683-8667"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyang Tan","raw_affiliation_strings":["College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics, MIIT Key Laboratory of Pattern Analysis and Machine Intelligence, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-2683-8667","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics, MIIT Key Laboratory of Pattern Analysis and Machine Intelligence, Nanjing, China","institution_ids":["https://openalex.org/I9842412"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5083980038"],"corresponding_institution_ids":["https://openalex.org/I9842412"],"apc_list":null,"apc_paid":null,"fwci":2.6597,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.91537843,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"34","issue":"1","first_page":"52","last_page":"63"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10409","display_name":"Fuel Cells and Related Materials","score":0.9598000049591064,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8314155340194702},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6578542590141296},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.6155645847320557},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5767542719841003},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.561220645904541},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5502706170082092},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4975891411304474},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.44229868054389954},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.43458643555641174},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3700713515281677},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34642666578292847},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17013967037200928}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8314155340194702},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6578542590141296},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.6155645847320557},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5767542719841003},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.561220645904541},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5502706170082092},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4975891411304474},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.44229868054389954},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.43458643555641174},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3700713515281677},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34642666578292847},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17013967037200928},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2021.3089493","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3089493","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:34181556","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34181556","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Partnerships for the goals","id":"https://metadata.un.org/sdg/17","score":0.41999998688697815}],"awards":[{"id":"https://openalex.org/G258691260","display_name":null,"funder_award_id":"XZA20005","funder_id":"https://openalex.org/F4320322438","funder_display_name":"Nanjing University of Aeronautics and Astronautics"},{"id":"https://openalex.org/G5205180948","display_name":null,"funder_award_id":"61976115","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5684294440","display_name":null,"funder_award_id":"56XZA18009","funder_id":"https://openalex.org/F4320322438","funder_display_name":"Nanjing University of Aeronautics and Astronautics"},{"id":"https://openalex.org/G7529917","display_name":null,"funder_award_id":"61732006","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322438","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1579184372","https://openalex.org/W1641379095","https://openalex.org/W2026662445","https://openalex.org/W2100752967","https://openalex.org/W2107112577","https://openalex.org/W2120846115","https://openalex.org/W2145339207","https://openalex.org/W2166533447","https://openalex.org/W2201581102","https://openalex.org/W2509444091","https://openalex.org/W2593237273","https://openalex.org/W2595461628","https://openalex.org/W2617547828","https://openalex.org/W2736601468","https://openalex.org/W2747213132","https://openalex.org/W2912496897","https://openalex.org/W2949464762","https://openalex.org/W2954132259","https://openalex.org/W2962766894","https://openalex.org/W2963252619","https://openalex.org/W3036329728","https://openalex.org/W6603025879","https://openalex.org/W6631137000","https://openalex.org/W6633472260","https://openalex.org/W6640212811","https://openalex.org/W6674705169","https://openalex.org/W6678168664","https://openalex.org/W6687681856","https://openalex.org/W6715102896","https://openalex.org/W6734678876","https://openalex.org/W6738796088","https://openalex.org/W6741002519","https://openalex.org/W6749304979","https://openalex.org/W6752380930","https://openalex.org/W6754627483","https://openalex.org/W6757469721","https://openalex.org/W6758563768","https://openalex.org/W6758846586","https://openalex.org/W6762491519","https://openalex.org/W6763484891","https://openalex.org/W6767327128","https://openalex.org/W6864424756"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W2025663273","https://openalex.org/W3099153698","https://openalex.org/W3149091143"],"abstract_inverted_index":{"Learning":[0],"a":[1,10,92,109,130,187,196],"stable":[2],"and":[3,81,147],"generalizable":[4],"centralized":[5,206],"value":[6],"function":[7,105],"(CVF)":[8],"is":[9,77,143],"crucial":[11],"but":[12,190],"challenging":[13],"task":[14],"in":[15,40,67,165],"multiagent":[16],"reinforcement":[17],"learning":[18],"(MARL),":[19],"as":[20,91,114,195],"it":[21,124],"has":[22],"to":[23,57,86,94,117,145,199],"deal":[24],"with":[25,35,125,208],"the":[26,29,36,62,88,96,115,126,138,158,169,201],"issue":[27],"that":[28,52,137,176],"joint":[30],"action":[31],"space":[32],"increases":[33],"exponentially":[34],"number":[37],"of":[38,161,204],"agents":[39],"such":[41,74],"scenarios.":[42],"This":[43],"article":[44],"proposes":[45],"an":[46,54],"approach,":[47],"named":[48],"SMIX(":[49,140],"\u03bb":[50,89,141],"),":[51],"uses":[53],"OFF-policy":[55,75],"training":[56,76,207],"achieve":[58],"this":[59,102],"by":[60,186,213],"avoiding":[61],"greedy":[63],"assumption":[64],"commonly":[65],"made":[66],"CVF":[68],"learning.":[69],"As":[70],"importance":[71],"sampling":[72],"for":[73],"both":[78],"computationally":[79],"costly":[80],"numerically":[82],"unstable,":[83],"we":[84,107,135],"proposed":[85,139],"use":[87],"-return":[90],"proxy":[93],"compute":[95],"temporal":[97],"difference":[98],"(TD)":[99],"error.":[100],"With":[101],"new":[103],"loss":[104],"objective,":[106],"adopt":[108],"modified":[110],"QMIX":[111],"network":[112],"structure":[113],"base":[116],"train":[118],"our":[119,177],"model.":[120],"By":[121],"further":[122],"connecting":[123],"Q(\u03bb)":[127,146],"approach":[128,178],"from":[129,157],"unified":[131],"expectation":[132],"correction":[133],"viewpoint,":[134],"show":[136],")":[142],"equivalent":[144],"hence":[148],"shares":[149],"its":[150],"convergence":[151],"properties,":[152],"while":[153],"without":[154],"being":[155],"suffered":[156],"aforementioned":[159],"curse":[160],"dimensionality":[162],"problem":[163],"inherent":[164],"MARL.":[166],"Experiments":[167],"on":[168],"StarCraft":[170],"Multiagent":[171],"Challenge":[172],"(SMAC)":[173],"benchmark":[174],"demonstrate":[175],"not":[179],"only":[180],"outperforms":[181],"several":[182],"state-of-the-art":[183],"MARL":[184],"methods":[185],"large":[188],"margin":[189],"also":[191],"can":[192],"be":[193],"used":[194],"general":[197],"tool":[198],"improve":[200],"overall":[202],"performance":[203],"other":[205],"decentralized":[209],"execution":[210],"(CTDE)-type":[211],"algorithms":[212],"enhancing":[214],"their":[215],"CVFs.":[216]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
