{"id":"https://openalex.org/W4385627070","doi":"https://doi.org/10.1109/tg.2023.3302694","title":"Leveraging Joint-Action Embedding in Multiagent Reinforcement Learning for Cooperative Games","display_name":"Leveraging Joint-Action Embedding in Multiagent Reinforcement Learning for Cooperative Games","publication_year":2023,"publication_date":"2023-08-07","ids":{"openalex":"https://openalex.org/W4385627070","doi":"https://doi.org/10.1109/tg.2023.3302694"},"language":"en","primary_location":{"id":"doi:10.1109/tg.2023.3302694","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tg.2023.3302694","pdf_url":null,"source":{"id":"https://openalex.org/S4210224842","display_name":"IEEE Transactions on Games","issn_l":"2475-1502","issn":["2475-1502","2475-1510"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Games","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086169822","display_name":"Xingzhou Lou","orcid":"https://orcid.org/0000-0001-6380-2818"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xingzhou Lou","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences and Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences and Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109012644","display_name":"Junge Zhang","orcid":"https://orcid.org/0000-0002-9970-394X"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junge Zhang","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences and Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences and Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002080576","display_name":"Yali Du","orcid":"https://orcid.org/0000-0001-5683-2621"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yali Du","raw_affiliation_strings":["Department of Informatics, King&#x0027;s College London, London, U.K"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, King&#x0027;s College London, London, U.K","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074681163","display_name":"Chao Yu","orcid":"https://orcid.org/0000-0002-4371-3663"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Yu","raw_affiliation_strings":["School of Data and Computer Science, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Data and Computer Science, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057318866","display_name":"Zhaofeng He","orcid":"https://orcid.org/0000-0002-3433-8435"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaofeng He","raw_affiliation_strings":["School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028693655","display_name":"Kaiqi Huang","orcid":"https://orcid.org/0000-0002-2677-9273"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaiqi Huang","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences and Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences and Center for Research on Intelligent System and Engineering, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5086169822"],"corresponding_institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210112150","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.6959,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.75658576,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"16","issue":"2","first_page":"470","last_page":"482"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.8315960168838501},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.820164144039154},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7302481532096863},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.707133412361145},{"id":"https://openalex.org/keywords/maximization","display_name":"Maximization","score":0.6844207644462585},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5988848209381104},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.5930736064910889},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5305790305137634},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4730207920074463},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.38271263241767883},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3379100561141968},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.20922818779945374}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.8315960168838501},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.820164144039154},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7302481532096863},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.707133412361145},{"id":"https://openalex.org/C2776330181","wikidata":"https://www.wikidata.org/wiki/Q18358244","display_name":"Maximization","level":2,"score":0.6844207644462585},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5988848209381104},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.5930736064910889},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5305790305137634},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4730207920074463},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.38271263241767883},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3379100561141968},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20922818779945374},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tg.2023.3302694","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tg.2023.3302694","pdf_url":null,"source":{"id":"https://openalex.org/S4210224842","display_name":"IEEE Transactions on Games","issn_l":"2475-1502","issn":["2475-1502","2475-1510"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Games","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.46000000834465027,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G6431871145","display_name":null,"funder_award_id":"61721004","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322847","display_name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","ror":"https://ror.org/031141b54"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":84,"referenced_works":["https://openalex.org/W241919466","https://openalex.org/W1924770834","https://openalex.org/W2044212084","https://openalex.org/W2088956500","https://openalex.org/W2149350210","https://openalex.org/W2166533447","https://openalex.org/W2292533394","https://openalex.org/W2617547828","https://openalex.org/W2747213132","https://openalex.org/W2766447205","https://openalex.org/W2768636722","https://openalex.org/W2787666871","https://openalex.org/W2808602506","https://openalex.org/W2810602713","https://openalex.org/W2883725317","https://openalex.org/W2901024945","https://openalex.org/W2902174156","https://openalex.org/W2908895887","https://openalex.org/W2913906356","https://openalex.org/W2962764167","https://openalex.org/W2963146510","https://openalex.org/W2963890729","https://openalex.org/W2964179661","https://openalex.org/W2969195240","https://openalex.org/W2982316857","https://openalex.org/W2988595664","https://openalex.org/W2990500698","https://openalex.org/W2996037775","https://openalex.org/W2998718698","https://openalex.org/W3004640943","https://openalex.org/W3029584598","https://openalex.org/W3041202696","https://openalex.org/W3044976552","https://openalex.org/W3046288222","https://openalex.org/W3089778445","https://openalex.org/W3099518626","https://openalex.org/W3110876951","https://openalex.org/W3116531410","https://openalex.org/W3118881636","https://openalex.org/W3119176801","https://openalex.org/W3134226813","https://openalex.org/W3153676008","https://openalex.org/W3176265013","https://openalex.org/W3189092450","https://openalex.org/W3197590744","https://openalex.org/W3206495137","https://openalex.org/W4232613155","https://openalex.org/W4254751698","https://openalex.org/W4286902222","https://openalex.org/W4287757834","https://openalex.org/W4293862243","https://openalex.org/W4294170691","https://openalex.org/W4310705874","https://openalex.org/W4394672593","https://openalex.org/W6640212811","https://openalex.org/W6677939520","https://openalex.org/W6682889407","https://openalex.org/W6683195989","https://openalex.org/W6712181171","https://openalex.org/W6726754200","https://openalex.org/W6734678876","https://openalex.org/W6738796088","https://openalex.org/W6739516088","https://openalex.org/W6747837047","https://openalex.org/W6749304979","https://openalex.org/W6758076299","https://openalex.org/W6758641611","https://openalex.org/W6758846586","https://openalex.org/W6759315145","https://openalex.org/W6762491519","https://openalex.org/W6772005887","https://openalex.org/W6773620346","https://openalex.org/W6777981532","https://openalex.org/W6779547172","https://openalex.org/W6780768476","https://openalex.org/W6781750019","https://openalex.org/W6783196708","https://openalex.org/W6784046417","https://openalex.org/W6787618087","https://openalex.org/W6791533262","https://openalex.org/W6796436736","https://openalex.org/W6800915406","https://openalex.org/W6802513241","https://openalex.org/W6864350279"],"related_works":["https://openalex.org/W4377293004","https://openalex.org/W3022183679","https://openalex.org/W4206669594","https://openalex.org/W2961085424","https://openalex.org/W3037422413","https://openalex.org/W2959276766","https://openalex.org/W4295941380","https://openalex.org/W260766989","https://openalex.org/W3139193008","https://openalex.org/W4319083788"],"abstract_inverted_index":{"State-of-the-art":[0],"multi-agent":[1,139],"policy":[2,39],"gradient":[3,40],"(MAPG)":[4],"methods":[5,142],"have":[6],"demonstrated":[7],"convincing":[8],"capability":[9],"in":[10,160],"many":[11],"cooperative":[12],"games.":[13],"However,":[14],"the":[15,22,66,70,90,99,155],"exponentially":[16],"growing":[17],"joint-action":[18,43,53,71,87,131],"space":[19,72],"severely":[20],"challenges":[21],"critic's":[23,67],"value":[24],"evaluation":[25,108],"and":[26,46,113,143,165],"hinders":[27],"performance":[28],"of":[29,60,92,122,157],"MAPG":[30,50,116,145],"methods.":[31,146],"To":[32],"address":[33],"this":[34],"issue,":[35],"we":[36,148],"augment":[37],"Central-Q":[38],"with":[41,84,110],"a":[42,85],"embedding":[44,54,88,132],"function":[45,55],"propose":[47],"Mutual-information":[48],"Maximization":[49],"(M3APG).":[51],"The":[52],"makes":[56],"joint-actions":[57],"contain":[58],"information":[59,159],"state":[61],"transitions,":[62],"which":[63],"will":[64],"improve":[65],"generalization":[68],"over":[69],"by":[73],"allowing":[74],"it":[75],"to":[76,137,152],"infer":[77],"joint-actions'":[78],"outcomes.":[79],"We":[80,126],"theoretically":[81],"prove":[82],"that":[83,105,129,154],"fixed":[86],"function,":[89],"convergence":[91],"M3APG":[93,106],"is":[94,163],"guaranteed.":[95],"Experiment":[96],"results":[97,109],"on":[98],"StarCraft":[100],"Multi-Agent":[101],"Challenge":[102],"(SMAC)":[103],"demonstrate":[104],"gives":[107],"better":[111],"accuracy":[112],"outperform":[114],"other":[115],"basic":[117],"models":[118],"across":[119],"various":[120],"maps":[121],"multiple":[123],"difficulty":[124],"levels.":[125],"empirically":[127],"show":[128,153],"our":[130,161],"model":[133],"can":[134],"be":[135],"extended":[136],"value-based":[138],"reinforcement":[140],"learning":[141],"state-of-the-art":[144],"Finally,":[147],"run":[149],"ablation":[150],"study":[151],"usage":[156],"mutual":[158],"method":[162],"necessary":[164],"effective.":[166]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
