{"id":"https://openalex.org/W3172715706","doi":"https://doi.org/10.1109/tac.2021.3085171","title":"Policy Evaluation and Seeking for Multiagent Reinforcement Learning via Best Response","display_name":"Policy Evaluation and Seeking for Multiagent Reinforcement Learning via Best Response","publication_year":2021,"publication_date":"2021-05-31","ids":{"openalex":"https://openalex.org/W3172715706","doi":"https://doi.org/10.1109/tac.2021.3085171","mag":"3172715706"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2021.3085171","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2021.3085171","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058113154","display_name":"Rui Yan","orcid":"https://orcid.org/0000-0002-8685-5055"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Rui Yan","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035843542","display_name":"Xiaoming Duan","orcid":"https://orcid.org/0000-0002-2655-3987"},"institutions":[{"id":"https://openalex.org/I4210140143","display_name":"Dynamic Systems (United States)","ror":"https://ror.org/043vm9914","country_code":"US","type":"company","lineage":["https://openalex.org/I4210140143"]},{"id":"https://openalex.org/I154570441","display_name":"University of California, Santa Barbara","ror":"https://ror.org/02t274463","country_code":"US","type":"education","lineage":["https://openalex.org/I154570441"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoming Duan","raw_affiliation_strings":["Mechanical Engineering Department and the Center of Control, Dynamical Systems and Computation, University of California Santa Barbara, CA, USA"],"affiliations":[{"raw_affiliation_string":"Mechanical Engineering Department and the Center of Control, Dynamical Systems and Computation, University of California Santa Barbara, CA, USA","institution_ids":["https://openalex.org/I4210140143","https://openalex.org/I154570441"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043045459","display_name":"Zongying Shi","orcid":"https://orcid.org/0000-0002-6805-6858"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zongying Shi","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101631780","display_name":"Yisheng Zhong","orcid":"https://orcid.org/0000-0001-7871-7317"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yisheng Zhong","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003146330","display_name":"Jason R. Marden","orcid":"https://orcid.org/0000-0003-3260-8574"},"institutions":[{"id":"https://openalex.org/I154570441","display_name":"University of California, Santa Barbara","ror":"https://ror.org/02t274463","country_code":"US","type":"education","lineage":["https://openalex.org/I154570441"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason R. Marden","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of California Santa Barbara, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of California Santa Barbara, CA, USA","institution_ids":["https://openalex.org/I154570441"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068353454","display_name":"Francesco Bullo","orcid":"https://orcid.org/0000-0002-4785-2118"},"institutions":[{"id":"https://openalex.org/I154570441","display_name":"University of California, Santa Barbara","ror":"https://ror.org/02t274463","country_code":"US","type":"education","lineage":["https://openalex.org/I154570441"]},{"id":"https://openalex.org/I4210140143","display_name":"Dynamic Systems (United States)","ror":"https://ror.org/043vm9914","country_code":"US","type":"company","lineage":["https://openalex.org/I4210140143"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Francesco Bullo","raw_affiliation_strings":["Mechanical Engineering Department and the Center of Control, Dynamical Systems and Computation, University of California Santa Barbara, CA, USA"],"affiliations":[{"raw_affiliation_string":"Mechanical Engineering Department and the Center of Control, Dynamical Systems and Computation, University of California Santa Barbara, CA, USA","institution_ids":["https://openalex.org/I4210140143","https://openalex.org/I154570441"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5058113154"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.9518,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.79881837,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"67","issue":"4","first_page":"1898","last_page":"1913"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10646","display_name":"Experimental Behavioral Economics Studies","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7808662056922913},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6199252605438232},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5531561970710754},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5192532539367676},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.5002562999725342},{"id":"https://openalex.org/keywords/best-response","display_name":"Best response","score":0.4653832018375397},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4553239643573761},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.44767582416534424},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4443408250808716},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.37978264689445496},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3587002754211426},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3357952833175659},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.32576999068260193},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.09224745631217957}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7808662056922913},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6199252605438232},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5531561970710754},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5192532539367676},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.5002562999725342},{"id":"https://openalex.org/C32407928","wikidata":"https://www.wikidata.org/wiki/Q2733833","display_name":"Best response","level":3,"score":0.4653832018375397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4553239643573761},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.44767582416534424},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4443408250808716},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.37978264689445496},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3587002754211426},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3357952833175659},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32576999068260193},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.09224745631217957},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tac.2021.3085171","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2021.3085171","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Partnerships for the goals","id":"https://metadata.un.org/sdg/17","score":0.5699999928474426}],"awards":[{"id":"https://openalex.org/G3335825630","display_name":null,"funder_award_id":"61374034","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G589342140","display_name":null,"funder_award_id":"FA9550-15-1-0138","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W1500428599","https://openalex.org/W1542941925","https://openalex.org/W1557517019","https://openalex.org/W1567092208","https://openalex.org/W1597864774","https://openalex.org/W1641379095","https://openalex.org/W2010612033","https://openalex.org/W2010654234","https://openalex.org/W2034184818","https://openalex.org/W2036807120","https://openalex.org/W2042040994","https://openalex.org/W2043509535","https://openalex.org/W2053957335","https://openalex.org/W2067050450","https://openalex.org/W2083342911","https://openalex.org/W2118994458","https://openalex.org/W2142612324","https://openalex.org/W2168509539","https://openalex.org/W2176451521","https://openalex.org/W2257979135","https://openalex.org/W2755613326","https://openalex.org/W2785598896","https://openalex.org/W2791232707","https://openalex.org/W2793538878","https://openalex.org/W2902907165","https://openalex.org/W2925418831","https://openalex.org/W2962990479","https://openalex.org/W2963020939","https://openalex.org/W2963275513","https://openalex.org/W2967727187","https://openalex.org/W2972260985","https://openalex.org/W2982316857","https://openalex.org/W3012335515","https://openalex.org/W3021603984","https://openalex.org/W3099277320","https://openalex.org/W3148958427","https://openalex.org/W4211095127","https://openalex.org/W4214717370","https://openalex.org/W4239654070","https://openalex.org/W4298857966","https://openalex.org/W6633910650","https://openalex.org/W6637967152","https://openalex.org/W6659308809","https://openalex.org/W6676957253","https://openalex.org/W6682264641","https://openalex.org/W6712181171","https://openalex.org/W6735011893","https://openalex.org/W6743367460","https://openalex.org/W6748672522","https://openalex.org/W6767166739","https://openalex.org/W6768536907"],"related_works":["https://openalex.org/W2236801283","https://openalex.org/W2728657731","https://openalex.org/W2481143976","https://openalex.org/W1853631319","https://openalex.org/W2092374696","https://openalex.org/W2803932348","https://openalex.org/W3207342620","https://openalex.org/W2803931294","https://openalex.org/W2013767790","https://openalex.org/W3030822758"],"abstract_inverted_index":{"Multiagent":[0],"policy":[1,29],"evaluation":[2],"and":[3,23,49,86,89,119,213,226],"seeking":[4],"are":[5,148],"long-standing":[6],"challenges":[7],"in":[8,26,53],"developing":[9],"theories":[10],"for":[11,45,70,153,171,222,234],"multiagent":[12,54],"reinforcement":[13,95],"learning":[14,19,96,224],"(MARL),":[15],"due":[16,239],"to":[17,63,231,240],"multidimensional":[18],"goals,":[20],"nonstationary":[21],"environment,":[22],"scalability":[24,211],"issues":[25],"the":[27,46,114,117,140,168,172,188,191,199,210,218,223,241],"joint":[28],"space.":[30],"This":[31],"article":[32],"introduces":[33],"two":[34],"metrics":[35,189],"grounded":[36],"on":[37,83,105],"a":[38,68,125,134,154,181],"game-theoretic":[39,229],"solution":[40],"concept":[41],"called":[42],"sink":[43],"equilibrium,":[44],"evaluation,":[47],"ranking,":[48],"computation":[50],"of":[51,136,157,183,190,220],"policies":[52,144,192],"learning.":[55],"We":[56,109,163],"adopt":[57],"strict":[58],"best":[59],"response":[60],"dynamics":[61],"(SBRDs)":[62],"model":[64],"selfish":[65],"behaviors":[66,79],"at":[67],"meta-level":[69],"MARL.":[71],"Our":[72],"approach":[73],"can":[74],"deal":[75],"with":[76,93,139,145,150,160,194],"dynamical":[77],"cyclical":[78],"(unlike":[80],"approaches":[81],"based":[82],"Nash":[84],"equilibria":[85],"Elo":[87],"ratings),":[88],"is":[90,174],"more":[91],"compatible":[92],"single-agent":[94],"than":[97],"<inline-formula":[98],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[99],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[100],"notation=\"LaTeX\">$\\alpha$</tex-math></inline-formula>":[101],"-rank,":[102],"which":[103],"relies":[104],"weakly":[106],"better":[107],"responses.":[108],"first":[110],"consider":[111,165],"settings":[112,166],"where":[113,167],"difference":[115,173],"between":[116],"largest":[118,121],"second":[120],"equilibrium":[122],"metric":[123,147],"has":[124],"known":[126],"lower":[127,169],"bound.":[128],"With":[129],"this":[130,177],"knowledge,":[131],"we":[132,179],"propose":[133,180],"class":[135,156,182],"perturbed":[137,184,207],"SBRD":[138,185,208],"following":[141],"property:":[142],"only":[143],"maximum":[146],"observed":[149,193],"nonzero":[151,195],"probability":[152,196],"broad":[155],"stochastic":[158],"games":[159],"finite":[161],"memory.":[162],"then":[164],"bound":[170],"unknown.":[175],"For":[176],"setting,":[178],"such":[186],"that":[187],"differ":[197],"from":[198],"optimal":[200],"by":[201,216],"any":[202],"given":[203],"tolerance.":[204],"The":[205],"proposed":[206],"addresses":[209],"issue":[212],"opponent-induced":[214],"nonstationarity":[215],"fixing":[217],"strategies":[219],"others":[221],"agent,":[225],"uses":[227],"empirical":[228],"analysis":[230],"estimate":[232],"payoffs":[233],"each":[235],"strategy":[236],"profile":[237],"obtained":[238],"perturbation.":[242]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}