{"id":"https://openalex.org/W4401387035","doi":"https://doi.org/10.1109/tcss.2024.3428334","title":"Priority Over Quantity: A Self-Incentive Credit Assignment Scheme for Cooperative Multiagent Reinforcement Learning","display_name":"Priority Over Quantity: A Self-Incentive Credit Assignment Scheme for Cooperative Multiagent Reinforcement Learning","publication_year":2024,"publication_date":"2024-08-07","ids":{"openalex":"https://openalex.org/W4401387035","doi":"https://doi.org/10.1109/tcss.2024.3428334"},"language":"en","primary_location":{"id":"doi:10.1109/tcss.2024.3428334","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcss.2024.3428334","pdf_url":null,"source":{"id":"https://openalex.org/S2490693980","display_name":"IEEE Transactions on Computational Social Systems","issn_l":"2329-924X","issn":["2329-924X","2373-7476"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computational Social Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100703190","display_name":"Hao Tang","orcid":"https://orcid.org/0009-0006-6814-3456"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]},{"id":"https://openalex.org/I1327237609","display_name":"Ministry of Education of the People's Republic of China","ror":"https://ror.org/01mv9t934","country_code":"CN","type":"government","lineage":["https://openalex.org/I1327237609","https://openalex.org/I4210127390"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hao Tang","raw_affiliation_strings":["Department of Computer Science and Technology, Tongji University, Shanghai, China","Shanghai Artificial Intelligence Laboratory, Shanghai, China","Key Laboratory of Embedded System and Service Computing, Ministry of Education, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]},{"raw_affiliation_string":"Key Laboratory of Embedded System and Service Computing, Ministry of Education, Shanghai, China","institution_ids":["https://openalex.org/I1327237609"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100417007","display_name":"Cheng Wang","orcid":"https://orcid.org/0000-0002-4752-0316"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]},{"id":"https://openalex.org/I1327237609","display_name":"Ministry of Education of the People's Republic of China","ror":"https://ror.org/01mv9t934","country_code":"CN","type":"government","lineage":["https://openalex.org/I1327237609","https://openalex.org/I4210127390"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Wang","raw_affiliation_strings":["Department of Computer Science and Technology, Tongji University, Shanghai, China","Key Laboratory of Embedded System and Service Computing, Ministry of Education, Shanghai, China","Shanghai Artificial Intelligence Laboratory, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"Key Laboratory of Embedded System and Service Computing, Ministry of Education, Shanghai, China","institution_ids":["https://openalex.org/I1327237609"]},{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079539016","display_name":"Shengbo Chang","orcid":null},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]},{"id":"https://openalex.org/I1327237609","display_name":"Ministry of Education of the People's Republic of China","ror":"https://ror.org/01mv9t934","country_code":"CN","type":"government","lineage":["https://openalex.org/I1327237609","https://openalex.org/I4210127390"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengbo Chang","raw_affiliation_strings":["Department of Computer Science and Technology, Tongji University, Shanghai, China","Shanghai Artificial Intelligence Laboratory, Shanghai, China","Key Laboratory of Embedded System and Service Computing, Ministry of Education, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]},{"raw_affiliation_string":"Key Laboratory of Embedded System and Service Computing, Ministry of Education, Shanghai, China","institution_ids":["https://openalex.org/I1327237609"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100648251","display_name":"Junqi Zhang","orcid":"https://orcid.org/0000-0002-4465-5880"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]},{"id":"https://openalex.org/I1327237609","display_name":"Ministry of Education of the People's Republic of China","ror":"https://ror.org/01mv9t934","country_code":"CN","type":"government","lineage":["https://openalex.org/I1327237609","https://openalex.org/I4210127390"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junqi Zhang","raw_affiliation_strings":["Department of Computer Science and Technology, Tongji University, Shanghai, China","Shanghai Artificial Intelligence Laboratory, Shanghai, China","Key Laboratory of Embedded System and Service Computing, Ministry of Education, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory, Shanghai, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]},{"raw_affiliation_string":"Key Laboratory of Embedded System and Service Computing, Ministry of Education, Shanghai, China","institution_ids":["https://openalex.org/I1327237609"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100703190"],"corresponding_institution_ids":["https://openalex.org/I116953780","https://openalex.org/I1327237609","https://openalex.org/I4210100255","https://openalex.org/I4391012619"],"apc_list":null,"apc_paid":null,"fwci":1.963,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.86904414,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"11","issue":"6","first_page":"7766","last_page":"7777"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.8327999711036682,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.8327999711036682,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.7080000042915344,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10270","display_name":"Blockchain Technology Applications and Security","score":0.7075999975204468,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8506418466567993},{"id":"https://openalex.org/keywords/incentive","display_name":"Incentive","score":0.7663137912750244},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.6076595187187195},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5642945170402527},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5377703905105591},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.518168568611145},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.44916871190071106},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.3572271764278412},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32948821783065796},{"id":"https://openalex.org/keywords/microeconomics","display_name":"Microeconomics","score":0.259151816368103},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.18723425269126892},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.14507120847702026},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0845479667186737}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8506418466567993},{"id":"https://openalex.org/C29122968","wikidata":"https://www.wikidata.org/wiki/Q1414816","display_name":"Incentive","level":2,"score":0.7663137912750244},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.6076595187187195},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5642945170402527},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5377703905105591},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.518168568611145},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.44916871190071106},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3572271764278412},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32948821783065796},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.259151816368103},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.18723425269126892},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.14507120847702026},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0845479667186737},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcss.2024.3428334","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcss.2024.3428334","pdf_url":null,"source":{"id":"https://openalex.org/S2490693980","display_name":"IEEE Transactions on Computational Social Systems","issn_l":"2329-924X","issn":["2329-924X","2373-7476"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computational Social Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1012616008","display_name":null,"funder_award_id":"62372328","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1395231520","display_name":null,"funder_award_id":"22XD1423700","funder_id":"https://openalex.org/F4320335796","funder_display_name":"Program of Shanghai Academic Research Leader"},{"id":"https://openalex.org/G2999259666","display_name":null,"funder_award_id":"22120240357","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null},{"id":"https://openalex.org/F4320335796","display_name":"Program of Shanghai Academic Research Leader","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1641379095","https://openalex.org/W2076337359","https://openalex.org/W2104689655","https://openalex.org/W2145339207","https://openalex.org/W2292533394","https://openalex.org/W2588790649","https://openalex.org/W2617547828","https://openalex.org/W2626637010","https://openalex.org/W2746553466","https://openalex.org/W2747213132","https://openalex.org/W2968764495","https://openalex.org/W2972122474","https://openalex.org/W2981038142","https://openalex.org/W2991046523","https://openalex.org/W3102824929","https://openalex.org/W3157603151","https://openalex.org/W3188541024","https://openalex.org/W3209177686","https://openalex.org/W4220758675","https://openalex.org/W4225665995","https://openalex.org/W4385062434","https://openalex.org/W4389170002","https://openalex.org/W4399346003","https://openalex.org/W6638018090","https://openalex.org/W6676056555","https://openalex.org/W6677939520","https://openalex.org/W6683300800","https://openalex.org/W6721101288","https://openalex.org/W6739516088","https://openalex.org/W6747146101","https://openalex.org/W6749304979","https://openalex.org/W6758846586","https://openalex.org/W6762491519","https://openalex.org/W6767327128","https://openalex.org/W6773690109","https://openalex.org/W6781750019","https://openalex.org/W6802002411","https://openalex.org/W6840380725","https://openalex.org/W6849054939"],"related_works":["https://openalex.org/W3096874164","https://openalex.org/W2166117066","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W3087814763","https://openalex.org/W4400868993","https://openalex.org/W2361647908","https://openalex.org/W2937181779","https://openalex.org/W2537866915","https://openalex.org/W2089415692"],"abstract_inverted_index":{"Centralized":[0],"training":[1],"and":[2,14,108,140,163],"decentralized":[3],"execution":[4],"(CTDE)":[5],"paradigm":[6,34],"is":[7,35],"widely":[8,180],"employed":[9],"to":[10,69],"address":[11],"the":[12,24,29,32,91,105,110,124,127,142,160,168,179,188,201],"nonstationary":[13],"partial":[15],"observability":[16],"in":[17,156],"multiagent":[18,184],"reinforcement":[19],"learning":[20,161],"(MARL).":[21],"One":[22],"of":[23,31,93,101,126,158,178,193],"main":[25],"challenges":[26],"that":[27,133,149],"restricts":[28],"performance":[30,55,154,170],"CTDE":[33],"<italic":[36,173],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[37,174],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">credit":[38],"assignment.</i>":[39],"Existing":[40],"methods":[41],"cannot":[42],"sufficiently":[43],"energize":[44],"each":[45],"agent":[46,72],"for":[47],"exploring":[48],"a":[49,64,76,117],"broader":[50],"solution":[51],"space":[52],"without":[53],"compromising":[54],"or":[56,197],"factorization":[57,78,84],"complexity.":[58],"In":[59],"this":[60],"article,":[61],"we":[62],"propose":[63],"self-incentive":[65],"credit":[66],"assignment":[67],"scheme":[68],"prioritize":[70],"individual":[71],"actions":[73],"based":[74],"on":[75,171,190],"novel":[77],"method":[79,135,151],"called":[80],"multihead":[81],"residual":[82,111],"value":[83,102,115,128],"(MRVF)":[85],"rather":[86],"than":[87],"being":[88],"constrained":[89],"by":[90],"quantity":[92],"collective":[94],"policies.":[95],"It":[96],"learns":[97],"an":[98],"extra":[99],"representation":[100],"gradients":[103],"from":[104],"cooperative":[106],"behaviors":[107],"factorizes":[109],"global":[112],"joint":[113],"action":[114],"as":[116,198,200],"monotonic":[118],"function,":[119],"which":[120],"can":[121],"effectively":[122],"improve":[123],"representability":[125],"function.":[129],"Theoretical":[130],"analysis":[131],"indicates":[132],"our":[134,150],"has":[136],"stronger":[137],"representational":[138],"ability":[139],"satisfies":[141],"individual-global-max":[143],"(IGM)":[144],"condition.":[145],"Extensive":[146],"experiments":[147],"validate":[148],"achieves":[152],"significant":[153],"improvement":[155],"terms":[157],"both":[159],"speed":[162],"stability;":[164],"particularly,":[165],"it":[166],"gains":[167],"best":[169],"two":[172],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">super":[175],"hard</i>":[176],"maps":[177],"used":[181],"benchmark":[182],"StarCraft":[183],"challenge":[185],"(SMAC)":[186],"while":[187],"performances":[189],"other":[191],"scenarios":[192],"SMAC":[194],"are":[195],"better":[196],"well":[199],"state-of-the-art":[202],"baseline.":[203]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
