{"id":"https://openalex.org/W3199614200","doi":"https://doi.org/10.1109/ijcnn52387.2021.9533636","title":"MMD-MIX: Value Function Factorisation with Maximum Mean Discrepancy for Cooperative Multi-Agent Reinforcement Learning","display_name":"MMD-MIX: Value Function Factorisation with Maximum Mean Discrepancy for Cooperative Multi-Agent Reinforcement Learning","publication_year":2021,"publication_date":"2021-07-18","ids":{"openalex":"https://openalex.org/W3199614200","doi":"https://doi.org/10.1109/ijcnn52387.2021.9533636","mag":"3199614200"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn52387.2021.9533636","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9533636","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100628168","display_name":"Zhiwei Xu","orcid":"https://orcid.org/0000-0002-0754-5295"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhiwei Xu","raw_affiliation_strings":["Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210100255"]},{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100419593","display_name":"Dapeng Li","orcid":"https://orcid.org/0000-0002-4735-7651"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dapeng Li","raw_affiliation_strings":["Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210100255"]},{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064488664","display_name":"Yunpeng Bai","orcid":"https://orcid.org/0000-0001-5670-7230"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunpeng Bai","raw_affiliation_strings":["Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210100255"]},{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101572310","display_name":"Guoliang Fan","orcid":"https://orcid.org/0000-0003-2724-2432"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoliang Fan","raw_affiliation_strings":["Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210100255"]},{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100628168"],"corresponding_institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210112150","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":1.0877,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.81971878,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9401999711990356,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.930400013923645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7788615822792053},{"id":"https://openalex.org/keywords/randomness","display_name":"Randomness","score":0.7012311816215515},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6087948679924011},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.5480329394340515},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.532965898513794},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.5102636814117432},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4782354235649109},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3231552243232727},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.23591041564941406},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.15407618880271912}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7788615822792053},{"id":"https://openalex.org/C125112378","wikidata":"https://www.wikidata.org/wiki/Q176640","display_name":"Randomness","level":2,"score":0.7012311816215515},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6087948679924011},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.5480329394340515},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.532965898513794},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.5102636814117432},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4782354235649109},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3231552243232727},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.23591041564941406},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.15407618880271912},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn52387.2021.9533636","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9533636","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W1487641199","https://openalex.org/W1757796397","https://openalex.org/W1959608418","https://openalex.org/W2099471712","https://openalex.org/W2121863487","https://openalex.org/W2125865219","https://openalex.org/W2212660284","https://openalex.org/W2395575420","https://openalex.org/W2617322972","https://openalex.org/W2617547828","https://openalex.org/W2626637010","https://openalex.org/W2747213132","https://openalex.org/W2756196406","https://openalex.org/W2765302304","https://openalex.org/W2803155336","https://openalex.org/W2803308811","https://openalex.org/W2894976951","https://openalex.org/W2907606902","https://openalex.org/W2946606218","https://openalex.org/W2949963774","https://openalex.org/W2950292946","https://openalex.org/W2951984055","https://openalex.org/W2962892300","https://openalex.org/W2962917939","https://openalex.org/W2962938168","https://openalex.org/W2962966033","https://openalex.org/W2963000099","https://openalex.org/W2963407617","https://openalex.org/W2963423916","https://openalex.org/W2963717208","https://openalex.org/W2963757175","https://openalex.org/W2964338167","https://openalex.org/W2970036354","https://openalex.org/W2981030070","https://openalex.org/W2983617607","https://openalex.org/W3009584650","https://openalex.org/W3034607397","https://openalex.org/W3045080532","https://openalex.org/W3093287223","https://openalex.org/W4288594419","https://openalex.org/W4288601262","https://openalex.org/W4295598622","https://openalex.org/W4298857966","https://openalex.org/W4298876402","https://openalex.org/W4299802797","https://openalex.org/W4320013936","https://openalex.org/W6629354409","https://openalex.org/W6637967152","https://openalex.org/W6640963894","https://openalex.org/W6678814708","https://openalex.org/W6683300800","https://openalex.org/W6688325169","https://openalex.org/W6712181171","https://openalex.org/W6713411898","https://openalex.org/W6737965738","https://openalex.org/W6738796088","https://openalex.org/W6739516088","https://openalex.org/W6744537943","https://openalex.org/W6749304979","https://openalex.org/W6751139674","https://openalex.org/W6751629939","https://openalex.org/W6755069753","https://openalex.org/W6757781149","https://openalex.org/W6758846586","https://openalex.org/W6762491519","https://openalex.org/W6767112054","https://openalex.org/W6774583691","https://openalex.org/W6781702133","https://openalex.org/W6864424756"],"related_works":["https://openalex.org/W3034924094","https://openalex.org/W3094954546","https://openalex.org/W1488708774","https://openalex.org/W1982811510","https://openalex.org/W4391100477","https://openalex.org/W2402189625","https://openalex.org/W4327779705","https://openalex.org/W4310560702","https://openalex.org/W1513698804","https://openalex.org/W2029712093"],"abstract_inverted_index":{"In":[0],"the":[1,15,49,79,82,99,127,138],"real":[2],"world,":[3],"many":[4,24],"tasks":[5],"require":[6],"multiple":[7],"agents":[8],"to":[9,64,97,103,122],"cooperate":[10],"with":[11,33],"each":[12],"other":[13],"under":[14],"condition":[16],"of":[17,42],"local":[18],"observations.":[19],"To":[20],"solve":[21],"such":[22],"problems,":[23],"multi-agent":[25],"reinforcement":[26,92],"learning":[27,93],"methods":[28],"based":[29],"on":[30],"Centralized":[31],"Training":[32],"Decentralized":[34],"Execution":[35],"have":[36],"been":[37],"proposed.":[38],"One":[39],"representative":[40],"class":[41],"work":[43],"is":[44,117],"value":[45,95],"decomposition,":[46],"which":[47,116],"decomposes":[48],"global":[50],"joint":[51],"Q-value":[52],"Q":[53,60],"<inf":[54,61],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[55,62],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">jt</inf>":[56],"into":[57,126],"individual":[58],"Q-values":[59],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">a</inf>":[63],"guide":[65],"individuals'":[66],"behaviors,":[67],"e.g.":[68],"VDN":[69],"(Value-Decomposition":[70],"Networks)":[71],"and":[72,94],"QMIX.":[73],"However,":[74],"these":[75],"baselines":[76,136],"often":[77],"ignore":[78],"randomness":[80,125],"in":[81,137],"situation.":[83],"We":[84],"propose":[85],"MMD-MIX,":[86],"a":[87,118],"method":[88],"that":[89,132],"combines":[90],"distributional":[91],"decomposition":[96],"alleviate":[98],"above":[100],"weaknesses.":[101],"Besides,":[102],"improve":[104],"data":[105],"sampling":[106],"efficiency,":[107],"we":[108],"were":[109],"inspired":[110],"by":[111],"REM":[112],"(Random":[113],"Ensemble":[114],"Mixture)":[115],"robust":[119],"RL":[120],"algorithm":[121],"explicitly":[123],"introduce":[124],"MMD-MIX.":[128],"The":[129],"experiments":[130],"demonstrate":[131],"MMD-MIX":[133],"outperforms":[134],"prior":[135],"StarCraft":[139],"Multi-Agent":[140],"Challenge":[141],"(SMAC)":[142],"environment.":[143]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
