{"id":"https://openalex.org/W2070963703","doi":"https://doi.org/10.1109/tcyb.2014.2332042","title":"Multiagent Reinforcement Learning With Unshared Value Functions","display_name":"Multiagent Reinforcement Learning With Unshared Value Functions","publication_year":2014,"publication_date":"2014-07-02","ids":{"openalex":"https://openalex.org/W2070963703","doi":"https://doi.org/10.1109/tcyb.2014.2332042","mag":"2070963703","pmid":"https://pubmed.ncbi.nlm.nih.gov/25014990"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2014.2332042","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2014.2332042","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104264364","display_name":"Yujing Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yujing Hu","raw_affiliation_strings":["State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","Department of Computer ScienceState Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]},{"raw_affiliation_string":"Department of Computer ScienceState Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074250521","display_name":"Yang Gao","orcid":"https://orcid.org/0000-0002-2488-1813"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Gao","raw_affiliation_strings":["State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","Department of Computer ScienceState Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]},{"raw_affiliation_string":"Department of Computer ScienceState Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017743551","display_name":"Bo An","orcid":"https://orcid.org/0000-0002-7064-7438"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Bo An","raw_affiliation_strings":["School of Computer Engineering, Nanyang Technological University, Singapore","School of Computer Engineering, Nanyang Technological University,,Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"School of Computer Engineering, Nanyang Technological University,,Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":8.2246,"has_fulltext":false,"cited_by_count":68,"citation_normalized_percentile":{"value":0.97532919,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"45","issue":"4","first_page":"647","last_page":"662"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11252","display_name":"Evolutionary Game Theory and Cooperation","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8499796986579895},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.7676949501037598},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6379423141479492},{"id":"https://openalex.org/keywords/negotiation","display_name":"Negotiation","score":0.5258869528770447},{"id":"https://openalex.org/keywords/correlated-equilibrium","display_name":"Correlated equilibrium","score":0.4454386234283447},{"id":"https://openalex.org/keywords/solution-concept","display_name":"Solution concept","score":0.4413367509841919},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.43588581681251526},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.41791629791259766},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.4158087372779846},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.41572698950767517},{"id":"https://openalex.org/keywords/game-theory","display_name":"Game theory","score":0.37938129901885986},{"id":"https://openalex.org/keywords/equilibrium-selection","display_name":"Equilibrium selection","score":0.3213844895362854},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2548210024833679},{"id":"https://openalex.org/keywords/repeated-game","display_name":"Repeated game","score":0.2106294333934784},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2104024887084961},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.18718475103378296}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8499796986579895},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.7676949501037598},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6379423141479492},{"id":"https://openalex.org/C199776023","wikidata":"https://www.wikidata.org/wiki/Q202875","display_name":"Negotiation","level":2,"score":0.5258869528770447},{"id":"https://openalex.org/C163630976","wikidata":"https://www.wikidata.org/wiki/Q964667","display_name":"Correlated equilibrium","level":5,"score":0.4454386234283447},{"id":"https://openalex.org/C11343654","wikidata":"https://www.wikidata.org/wiki/Q780008","display_name":"Solution concept","level":3,"score":0.4413367509841919},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.43588581681251526},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.41791629791259766},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.4158087372779846},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.41572698950767517},{"id":"https://openalex.org/C177142836","wikidata":"https://www.wikidata.org/wiki/Q44455","display_name":"Game theory","level":2,"score":0.37938129901885986},{"id":"https://openalex.org/C164407509","wikidata":"https://www.wikidata.org/wiki/Q5384490","display_name":"Equilibrium selection","level":4,"score":0.3213844895362854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2548210024833679},{"id":"https://openalex.org/C202556891","wikidata":"https://www.wikidata.org/wiki/Q1584646","display_name":"Repeated game","level":3,"score":0.2106294333934784},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2104024887084961},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.18718475103378296},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcyb.2014.2332042","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2014.2332042","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:25014990","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/25014990","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2042088034","display_name":"\u57fa\u4e8e\u4e91\u8ba1\u7b97\u7684\u6d77\u91cf\u6570\u636e\u6316\u6398","funder_award_id":"61035003","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2296064708","display_name":null,"funder_award_id":"NCET-10-0476","funder_id":"https://openalex.org/F4320334924","funder_display_name":"Program for New Century Excellent Talents in University"},{"id":"https://openalex.org/G5030852580","display_name":null,"funder_award_id":"61202212","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5616286496","display_name":null,"funder_award_id":"61175042","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6667581590","display_name":null,"funder_award_id":"61321491","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320334924","display_name":"Program for New Century Excellent Talents in University","ror":"https://ror.org/01mv9t934"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W30469914","https://openalex.org/W38166518","https://openalex.org/W67301991","https://openalex.org/W1481781224","https://openalex.org/W1502765764","https://openalex.org/W1513468570","https://openalex.org/W1519783625","https://openalex.org/W1542268266","https://openalex.org/W1542941925","https://openalex.org/W1560074431","https://openalex.org/W1868540347","https://openalex.org/W1974740629","https://openalex.org/W1976473913","https://openalex.org/W1978375026","https://openalex.org/W1991799203","https://openalex.org/W1996762597","https://openalex.org/W2010526786","https://openalex.org/W2027800098","https://openalex.org/W2037810568","https://openalex.org/W2064264248","https://openalex.org/W2072256588","https://openalex.org/W2073875457","https://openalex.org/W2096690894","https://openalex.org/W2097780422","https://openalex.org/W2099618002","https://openalex.org/W2103437045","https://openalex.org/W2104602264","https://openalex.org/W2112794046","https://openalex.org/W2120327309","https://openalex.org/W2120846115","https://openalex.org/W2121863487","https://openalex.org/W2124152208","https://openalex.org/W2130463867","https://openalex.org/W2131376880","https://openalex.org/W2145067550","https://openalex.org/W2153427071","https://openalex.org/W2164637474","https://openalex.org/W2320900399","https://openalex.org/W2406194061","https://openalex.org/W3011120880","https://openalex.org/W4214717370","https://openalex.org/W6601265479","https://openalex.org/W6601540754","https://openalex.org/W6602735774","https://openalex.org/W6629047614","https://openalex.org/W6630520071","https://openalex.org/W6631168379","https://openalex.org/W6633472260","https://openalex.org/W6674513192","https://openalex.org/W6675811377","https://openalex.org/W6678168664","https://openalex.org/W6681342480","https://openalex.org/W6684470913","https://openalex.org/W6713690997","https://openalex.org/W6775686901"],"related_works":["https://openalex.org/W2097226409","https://openalex.org/W2013767790","https://openalex.org/W2025767136","https://openalex.org/W2408729172","https://openalex.org/W2952611531","https://openalex.org/W615573743","https://openalex.org/W4299551322","https://openalex.org/W4301347782","https://openalex.org/W1667857601","https://openalex.org/W2200216774"],"abstract_inverted_index":{"One":[0],"important":[1],"approach":[2],"of":[3,15,28,95,114],"multiagent":[4],"reinforcement":[5,16],"learning":[6,17],"(MARL)":[7],"is":[8,12,49,105],"equilibrium-based":[9],"MARL,":[10],"which":[11,145,211],"a":[13,101,173,196],"combination":[14],"and":[18,32,73,131,226,236,265],"game":[19],"theory.":[20],"Most":[21],"existing":[22,231],"algorithms":[23,76,233,260],"involve":[24],"computationally":[25,107],"expensive":[26],"calculation":[27],"mixed":[29,96,102],"strategy":[30,90,97,103,116,125,129,134,142,156,164,180],"equilibria":[31,98,181],"require":[33],"agents":[34,52,81,146],"to":[35,57,62,70,82,215],"replicate":[36],"the":[37,78],"other":[38],"agents'":[39],"value":[40,84,183],"functions":[41,184],"for":[42,80,177],"equilibrium":[43,91,104,121,224],"computing":[44],"in":[45,208,247],"each":[46],"state.":[47],"This":[48,67],"unrealistic":[50],"since":[51,182],"may":[53],"not":[54,186],"be":[55],"willing":[56],"share":[58,83],"such":[59,251],"information":[60],"due":[61],"privacy":[63],"or":[64,153],"safety":[65],"concerns.":[66],"paper":[68],"aims":[69],"develop":[71],"novel":[72,197],"efficient":[74],"MARL":[75,198,217,232,259],"without":[77],"need":[79],"functions.":[85],"First,":[86],"we":[87,171,194,240],"adopt":[88],"pure":[89,115,124,155,179],"solution":[92,122,139],"concepts":[93,140],"instead":[94],"given":[99],"that":[100,162,242],"often":[106],"expensive.":[108],"In":[109,219],"this":[110],"paper,":[111],"three":[112],"types":[113],"profiles":[117,143,165],"are":[118,141,166,185,205,212],"utilized":[119],"as":[120,252,255,262],"concepts:":[123],"Nash":[126,157,237],"equilibrium,":[127],"equilibrium-dominating":[128,133],"profile,":[130],"nonstrict":[132],"profile.":[135],"The":[136],"latter":[137],"two":[138],"from":[144],"can":[147],"gain":[148],"higher":[149],"payoffs":[150],"than":[151,230],"one":[152],"more":[154],"equilibria.":[158,169],"Theoretical":[159],"analysis":[160],"shows":[161],"these":[163,192,220],"symmetric":[167],"meta":[168],"Second,":[170],"propose":[172,195],"multistep":[174],"negotiation":[175],"process":[176],"finding":[178],"shared":[187],"among":[188],"agents.":[189],"By":[190],"putting":[191],"together,":[193],"algorithm":[199],"called":[200],"negotiation-based":[201],"Q-learning":[202,235,264],"(NegoQ).":[203],"Experiments":[204],"first":[206],"conducted":[207],"grid-world":[209],"games,":[210,221,254],"widely":[213],"used":[214],"evaluate":[216],"algorithms.":[218],"NegoQ":[222,243],"learns":[223],"policies":[225],"runs":[227],"significantly":[228],"faster":[229],"(correlated":[234],"Q-learning).":[238,267],"Surprisingly,":[239],"find":[241],"also":[244],"performs":[245],"well":[246],"team":[248],"Markov":[249],"games":[250],"pursuit":[253],"compared":[256],"with":[257],"team-task-oriented":[258],"(such":[261],"friend":[263],"distributed":[266]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":13},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":5},{"year":2016,"cited_by_count":9},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}