{"id":"https://openalex.org/W3167061096","doi":"https://doi.org/10.1145/3447548.3467420","title":"Shapley Counterfactual Credits for Multi-Agent Reinforcement Learning","display_name":"Shapley Counterfactual Credits for Multi-Agent Reinforcement Learning","publication_year":2021,"publication_date":"2021-08-12","ids":{"openalex":"https://openalex.org/W3167061096","doi":"https://doi.org/10.1145/3447548.3467420","mag":"3167061096"},"language":"en","primary_location":{"id":"doi:10.1145/3447548.3467420","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3447548.3467420","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2106.00285","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100462974","display_name":"Jiahui Li","orcid":"https://orcid.org/0009-0001-8621-8849"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiahui Li","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041727387","display_name":"Kun Kuang","orcid":"https://orcid.org/0000-0001-7024-9790"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Kuang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101781010","display_name":"Baoxiang Wang","orcid":"https://orcid.org/0000-0002-6569-9049"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baoxiang Wang","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen &amp; Shenzhen Institute of Artificial Intelligence and Robotics for Society, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen &amp; Shenzhen Institute of Artificial Intelligence and Robotics for Society, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026553094","display_name":"Furui Liu","orcid":"https://orcid.org/0000-0003-3997-3822"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Furui Liu","raw_affiliation_strings":["Huawei Noah's Ark Lab, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100336360","display_name":"Long Chen","orcid":"https://orcid.org/0000-0001-6148-9709"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Chen","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004882141","display_name":"Fei Wu","orcid":"https://orcid.org/0000-0003-2139-8807"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Wu","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101485989","display_name":"Jun Xiao","orcid":"https://orcid.org/0000-0002-6142-9914"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Xiao","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.4771,"has_fulltext":false,"cited_by_count":50,"citation_normalized_percentile":{"value":0.95349763,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"934","last_page":"942"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9868999719619751,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9355000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.8289433717727661},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7765517830848694},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7332339882850647},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5830522179603577},{"id":"https://openalex.org/keywords/shapley-value","display_name":"Shapley value","score":0.4805048704147339},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.42409634590148926},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.4170180559158325},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4143916964530945},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.4139885902404785},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37532898783683777},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32755887508392334},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.16013413667678833},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.12059754133224487},{"id":"https://openalex.org/keywords/game-theory","display_name":"Game theory","score":0.1159733235836029},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10572642087936401}],"concepts":[{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.8289433717727661},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7765517830848694},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7332339882850647},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5830522179603577},{"id":"https://openalex.org/C199022921","wikidata":"https://www.wikidata.org/wiki/Q240046","display_name":"Shapley value","level":3,"score":0.4805048704147339},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.42409634590148926},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4170180559158325},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4143916964530945},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.4139885902404785},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37532898783683777},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32755887508392334},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.16013413667678833},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.12059754133224487},{"id":"https://openalex.org/C177142836","wikidata":"https://www.wikidata.org/wiki/Q44455","display_name":"Game theory","level":2,"score":0.1159733235836029},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10572642087936401},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3447548.3467420","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3447548.3467420","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2106.00285","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.00285","pdf_url":"https://arxiv.org/pdf/2106.00285","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-125284","is_oa":false,"landing_page_url":"http://gateway.isiknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcAuth=LinksAMR&SrcApp=PARTNER_APP&DestLinkType=FullRecord&DestApp=WOS&KeyUT=000749556800090","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference paper"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2106.00285","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.00285","pdf_url":"https://arxiv.org/pdf/2106.00285","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Partnerships for the goals","score":0.5299999713897705,"id":"https://metadata.un.org/sdg/17"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":67,"referenced_works":["https://openalex.org/W1562353621","https://openalex.org/W1641379095","https://openalex.org/W1993411524","https://openalex.org/W2012812921","https://openalex.org/W2026662445","https://openalex.org/W2088956500","https://openalex.org/W2099618002","https://openalex.org/W2114225169","https://openalex.org/W2122800638","https://openalex.org/W2136112566","https://openalex.org/W2137647991","https://openalex.org/W2165150801","https://openalex.org/W2254620049","https://openalex.org/W2292533394","https://openalex.org/W2436401953","https://openalex.org/W2617547828","https://openalex.org/W2622408375","https://openalex.org/W2623431351","https://openalex.org/W2734878594","https://openalex.org/W2747213132","https://openalex.org/W2768629321","https://openalex.org/W2785315072","https://openalex.org/W2807741983","https://openalex.org/W2894976951","https://openalex.org/W2912083425","https://openalex.org/W2924181074","https://openalex.org/W2939984132","https://openalex.org/W2949574275","https://openalex.org/W2949963774","https://openalex.org/W2951984055","https://openalex.org/W2962821147","https://openalex.org/W2962938168","https://openalex.org/W2962966033","https://openalex.org/W2963039558","https://openalex.org/W2963067999","https://openalex.org/W2963407617","https://openalex.org/W2963485523","https://openalex.org/W2963864421","https://openalex.org/W2964043796","https://openalex.org/W2972122474","https://openalex.org/W2987123286","https://openalex.org/W3004640943","https://openalex.org/W3004732081","https://openalex.org/W3007233875","https://openalex.org/W3007549203","https://openalex.org/W3034236692","https://openalex.org/W3034764457","https://openalex.org/W3035235410","https://openalex.org/W3039208705","https://openalex.org/W3093287223","https://openalex.org/W3097431078","https://openalex.org/W3100573319","https://openalex.org/W3102440254","https://openalex.org/W3102824929","https://openalex.org/W3104860527","https://openalex.org/W3116073702","https://openalex.org/W3117280628","https://openalex.org/W3187997433","https://openalex.org/W4287864753","https://openalex.org/W4288092767","https://openalex.org/W4288594419","https://openalex.org/W4289704137","https://openalex.org/W4295267831","https://openalex.org/W4295598622","https://openalex.org/W4299802797","https://openalex.org/W4302570325","https://openalex.org/W6600135713"],"related_works":["https://openalex.org/W3201448254","https://openalex.org/W4286970243","https://openalex.org/W2066431708","https://openalex.org/W4384133558","https://openalex.org/W3025615835","https://openalex.org/W173210993","https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W2025663273","https://openalex.org/W3099153698"],"abstract_inverted_index":{"Centralized":[0],"Training":[1],"with":[2,224,230],"Decentralized":[3],"Execution":[4],"(CTDE)":[5],"has":[6],"been":[7],"a":[8,85,114],"popular":[9],"paradigm":[10],"in":[11,22,31,137,167],"cooperative":[12,216],"Multi-Agent":[13],"Reinforcement":[14],"Learning":[15],"(MARL)":[16],"settings":[17],"and":[18,75,102,131,220],"is":[19,35],"widely":[20],"used":[21],"many":[23],"real":[24],"applications.":[25],"One":[26],"of":[27,44,72,88,100,126,144,171,180],"the":[28,32,42,49,60,70,78,89,124,149,153,162,168,178,194,222],"major":[29],"challenges":[30],"training":[33],"process":[34],"credit":[36,53,101,119,141,155],"assignment,":[37],"which":[38,121,146,192],"aims":[39],"to":[40,48,96,140,151],"deduce":[41],"contributions":[43],"each":[45,157],"agent":[46],"according":[47],"global":[50,79],"rewards.":[51],"Existing":[52],"assignment":[54,99,120],"methods":[55],"focus":[56],"on":[57,77,106,205,228],"either":[58],"decomposing":[59],"joint":[61],"value":[62,66,80],"function":[63],"into":[64],"individual":[65,154],"functions":[67],"or":[68],"measuring":[69],"impact":[71],"local":[73],"observations":[74],"actions":[76],"function.":[81],"These":[82],"approaches":[83],"lack":[84],"thorough":[86],"consideration":[87],"complicated":[90],"interactions":[91],"among":[92],"multiple":[93],"agents,":[94,145],"leading":[95],"an":[97,185],"unsuitable":[98],"subsequently":[103],"mediocre":[104],"results":[105],"MARL.":[107],"We":[108,182,201],"propose":[109],"Shapley":[110,129,172],"Counterfactual":[111],"Credit":[112],"Assignment,":[113],"novel":[115],"method":[116,187,204,213],"for":[117,123,156],"explicit":[118],"accounts":[122],"coalition":[125],"agents.":[127,181],"Specifically,":[128],"Value":[130,173],"its":[132,199],"desired":[133],"properties":[134],"are":[135],"leveraged":[136],"deep":[138],"MARL":[139,217],"any":[142],"combinations":[143],"grants":[147],"us":[148],"capability":[150],"estimate":[152],"agent.":[158],"Despite":[159],"this":[160],"capability,":[161],"main":[163],"technical":[164],"difficulty":[165],"lies":[166],"computational":[169],"complexity":[170,196],"who":[174],"grows":[175],"factorially":[176],"as":[177],"number":[179],"instead":[183],"utilize":[184],"approximation":[186],"via":[188],"Monte":[189],"Carlo":[190],"sampling,":[191],"reduces":[193],"sample":[195],"while":[197],"maintaining":[198],"effectiveness.":[200],"evaluate":[202],"our":[203],"StarCraft":[206],"II":[207],"benchmarks":[208],"across":[209],"different":[210],"scenarios.":[211],"Our":[212],"outperforms":[214],"existing":[215],"algorithms":[218],"significantly":[219],"achieves":[221],"state-of-the-art,":[223],"especially":[225],"large":[226],"margins":[227],"tasks":[229],"more":[231],"severe":[232],"difficulties.":[233]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":2}],"updated_date":"2026-06-19T17:40:00.097472","created_date":"2025-10-10T00:00:00"}
