{"id":"https://openalex.org/W3199614200","doi":"https://doi.org/10.1109/ijcnn52387.2021.9533636","title":"MMD-MIX: Value Function Factorisation with Maximum Mean Discrepancy for Cooperative Multi-Agent Reinforcement Learning","display_name":"MMD-MIX: Value Function Factorisation with Maximum Mean Discrepancy for Cooperative Multi-Agent Reinforcement Learning","publication_year":2021,"publication_date":"2021-07-18","ids":{"openalex":"https://openalex.org/W3199614200","doi":"https://doi.org/10.1109/ijcnn52387.2021.9533636","mag":"3199614200"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn52387.2021.9533636","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9533636","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100628168","display_name":"Zhiwei Xu","orcid":"https://orcid.org/0000-0002-0754-5295"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiwei Xu","raw_affiliation_strings":["Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210100255"]},{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100419593","display_name":"Dapeng Li","orcid":"https://orcid.org/0000-0002-4735-7651"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dapeng Li","raw_affiliation_strings":["Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210100255"]},{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064488664","display_name":"Yunpeng Bai","orcid":"https://orcid.org/0000-0001-5670-7230"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunpeng Bai","raw_affiliation_strings":["Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210100255"]},{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101572310","display_name":"Guoliang Fan","orcid":"https://orcid.org/0000-0003-2724-2432"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoliang Fan","raw_affiliation_strings":["Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210100255"]},{"raw_affiliation_string":"Fusion Innovation Center, Institute of Automation, Chinese Academy of Sciences School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1195,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.82534438,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9401999711990356,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.930400013923645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7788615822792053},{"id":"https://openalex.org/keywords/randomness","display_name":"Randomness","score":0.7012311816215515},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6087948679924011},{"id":"https://openalex.org/keywords/decomposition","display_name":"Decomposition","score":0.5480329394340515},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.532965898513794},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.5102636814117432},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4782354235649109},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3231552243232727},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.23591041564941406},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.15407618880271912}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7788615822792053},{"id":"https://openalex.org/C125112378","wikidata":"https://www.wikidata.org/wiki/Q176640","display_name":"Randomness","level":2,"score":0.7012311816215515},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6087948679924011},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.5480329394340515},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.532965898513794},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.5102636814117432},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4782354235649109},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3231552243232727},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.23591041564941406},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.15407618880271912},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn52387.2021.9533636","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9533636","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W1487641199","https://openalex.org/W1757796397","https://openalex.org/W1959608418","https://openalex.org/W2099471712","https://openalex.org/W2121863487","https://openalex.org/W2125865219","https://openalex.org/W2212660284","https://openalex.org/W2395575420","https://openalex.org/W2617322972","https://openalex.org/W2617547828","https://openalex.org/W2626637010","https://openalex.org/W2747213132","https://openalex.org/W2756196406","https://openalex.org/W2765302304","https://openalex.org/W2803155336","https://openalex.org/W2803308811","https://openalex.org/W2894976951","https://openalex.org/W2907606902","https://openalex.org/W2946606218","https://openalex.org/W2949963774","https://openalex.org/W2950292946","https://openalex.org/W2951984055","https://openalex.org/W2962892300","https://openalex.org/W2962917939","https://openalex.org/W2962938168","https://openalex.org/W2962966033","https://openalex.org/W2963000099","https://openalex.org/W2963407617","https://openalex.org/W2963423916","https://openalex.org/W2963717208","https://openalex.org/W2963757175","https://openalex.org/W2964338167","https://openalex.org/W2970036354","https://openalex.org/W2981030070","https://openalex.org/W2983617607","https://openalex.org/W3009584650","https://openalex.org/W3034607397","https://openalex.org/W3045080532","https://openalex.org/W3093287223","https://openalex.org/W4288594419","https://openalex.org/W4288601262","https://openalex.org/W4295598622","https://openalex.org/W4298857966","https://openalex.org/W4298876402","https://openalex.org/W4299802797","https://openalex.org/W4320013936","https://openalex.org/W6629354409","https://openalex.org/W6637967152","https://openalex.org/W6640963894","https://openalex.org/W6678814708","https://openalex.org/W6683300800","https://openalex.org/W6688325169","https://openalex.org/W6712181171","https://openalex.org/W6713411898","https://openalex.org/W6737965738","https://openalex.org/W6738796088","https://openalex.org/W6739516088","https://openalex.org/W6744537943","https://openalex.org/W6749304979","https://openalex.org/W6751139674","https://openalex.org/W6751629939","https://openalex.org/W6755069753","https://openalex.org/W6757781149","https://openalex.org/W6758846586","https://openalex.org/W6762491519","https://openalex.org/W6767112054","https://openalex.org/W6774583691","https://openalex.org/W6781702133","https://openalex.org/W6864424756"],"related_works":["https://openalex.org/W3034924094","https://openalex.org/W3094954546","https://openalex.org/W1488708774","https://openalex.org/W1982811510","https://openalex.org/W4391100477","https://openalex.org/W2402189625","https://openalex.org/W4327779705","https://openalex.org/W4310560702","https://openalex.org/W1513698804","https://openalex.org/W2029712093"],"abstract_inverted_index":{"In":[0],"the":[1,15,49,79,82,99,127,138],"real":[2],"world,":[3],"many":[4,24],"tasks":[5],"require":[6],"multiple":[7],"agents":[8],"to":[9,64,97,103,122],"cooperate":[10],"with":[11,33],"each":[12],"other":[13],"under":[14],"condition":[16],"of":[17,42],"local":[18],"observations.":[19],"To":[20],"solve":[21],"such":[22],"problems,":[23],"multi-agent":[25],"reinforcement":[26,92],"learning":[27,93],"methods":[28],"based":[29],"on":[30],"Centralized":[31],"Training":[32],"Decentralized":[34],"Execution":[35],"have":[36],"been":[37],"proposed.":[38],"One":[39],"representative":[40],"class":[41],"work":[43],"is":[44,117],"value":[45,95],"decomposition,":[46],"which":[47,116],"decomposes":[48],"global":[50],"joint":[51],"Q-value":[52],"Q":[53,60],"<inf":[54,61],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[55,62],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">jt</inf>":[56],"into":[57,126],"individual":[58],"Q-values":[59],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">a</inf>":[63],"guide":[65],"individuals'":[66],"behaviors,":[67],"e.g.":[68],"VDN":[69],"(Value-Decomposition":[70],"Networks)":[71],"and":[72,94],"QMIX.":[73],"However,":[74],"these":[75],"baselines":[76,136],"often":[77],"ignore":[78],"randomness":[80,125],"in":[81,137],"situation.":[83],"We":[84],"propose":[85],"MMD-MIX,":[86],"a":[87,118],"method":[88],"that":[89,132],"combines":[90],"distributional":[91],"decomposition":[96],"alleviate":[98],"above":[100],"weaknesses.":[101],"Besides,":[102],"improve":[104],"data":[105],"sampling":[106],"efficiency,":[107],"we":[108],"were":[109],"inspired":[110],"by":[111],"REM":[112],"(Random":[113],"Ensemble":[114],"Mixture)":[115],"robust":[119],"RL":[120],"algorithm":[121],"explicitly":[123],"introduce":[124],"MMD-MIX.":[128],"The":[129],"experiments":[130],"demonstrate":[131],"MMD-MIX":[133],"outperforms":[134],"prior":[135],"StarCraft":[139],"Multi-Agent":[140],"Challenge":[141],"(SMAC)":[142],"environment.":[143]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
