{"id":"https://openalex.org/W2922444559","doi":"https://doi.org/10.24963/ijcai.2019/66","title":"Computing Approximate Equilibria in Sequential Adversarial Games by Exploitability Descent","display_name":"Computing Approximate Equilibria in Sequential Adversarial Games by Exploitability Descent","publication_year":2019,"publication_date":"2019-07-28","ids":{"openalex":"https://openalex.org/W2922444559","doi":"https://doi.org/10.24963/ijcai.2019/66","mag":"2922444559"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2019/66","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/66","pdf_url":"https://www.ijcai.org/proceedings/2019/0066.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2019/0066.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081026564","display_name":"Edward Lockhart","orcid":"https://orcid.org/0000-0001-8753-0765"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Edward Lockhart","raw_affiliation_strings":["DeepMind"],"affiliations":[{"raw_affiliation_string":"DeepMind","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049659586","display_name":"Marc Lanctot","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Marc Lanctot","raw_affiliation_strings":["DeepMind"],"affiliations":[{"raw_affiliation_string":"DeepMind","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056707583","display_name":"Julien P\u00e9rolat","orcid":"https://orcid.org/0000-0002-8176-1666"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Julien P\u00e9rolat","raw_affiliation_strings":["DeepMind"],"affiliations":[{"raw_affiliation_string":"DeepMind","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058299981","display_name":"Jean-Baptiste Lespiau","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jean-Baptiste Lespiau","raw_affiliation_strings":["DeepMind"],"affiliations":[{"raw_affiliation_string":"DeepMind","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046207547","display_name":"Dustin Morrill","orcid":"https://orcid.org/0000-0002-6454-1848"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA","GB"],"is_corresponding":false,"raw_author_name":"Dustin Morrill","raw_affiliation_strings":["DeepMind","University of Alberta Edmonton Canada"],"affiliations":[{"raw_affiliation_string":"DeepMind","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"University of Alberta Edmonton Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090501025","display_name":"Finbarr Timbers","orcid":"https://orcid.org/0000-0001-9047-9542"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Finbarr TImbers","raw_affiliation_strings":["DeepMind"],"affiliations":[{"raw_affiliation_string":"DeepMind","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008547992","display_name":"Karl Tuyls","orcid":"https://orcid.org/0000-0001-7929-1944"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Karl Tuyls","raw_affiliation_strings":["DeepMind"],"affiliations":[{"raw_affiliation_string":"DeepMind","institution_ids":["https://openalex.org/I4210090411"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5081026564"],"corresponding_institution_ids":["https://openalex.org/I4210090411"],"apc_list":null,"apc_paid":null,"fwci":3.35355193,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.91763775,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"464","last_page":"470"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fictitious-play","display_name":"Fictitious play","score":0.8248352408409119},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6971151828765869},{"id":"https://openalex.org/keywords/nash-equilibrium","display_name":"Nash equilibrium","score":0.6835618019104004},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6415741443634033},{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.6310446858406067},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6216573119163513},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.583360493183136},{"id":"https://openalex.org/keywords/perfect-information","display_name":"Perfect information","score":0.5799729228019714},{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.5775967836380005},{"id":"https://openalex.org/keywords/descent","display_name":"Descent (aeronautics)","score":0.5487990975379944},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5260741114616394},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.4840068519115448},{"id":"https://openalex.org/keywords/imperfect","display_name":"Imperfect","score":0.46846503019332886},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.4288651645183563},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2444019317626953},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.14930400252342224},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1395666003227234}],"concepts":[{"id":"https://openalex.org/C145071142","wikidata":"https://www.wikidata.org/wiki/Q1411116","display_name":"Fictitious play","level":3,"score":0.8248352408409119},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6971151828765869},{"id":"https://openalex.org/C46814582","wikidata":"https://www.wikidata.org/wiki/Q23389","display_name":"Nash equilibrium","level":2,"score":0.6835618019104004},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6415741443634033},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.6310446858406067},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6216573119163513},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.583360493183136},{"id":"https://openalex.org/C123676819","wikidata":"https://www.wikidata.org/wiki/Q1074338","display_name":"Perfect information","level":2,"score":0.5799729228019714},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.5775967836380005},{"id":"https://openalex.org/C2776637919","wikidata":"https://www.wikidata.org/wiki/Q624380","display_name":"Descent (aeronautics)","level":2,"score":0.5487990975379944},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5260741114616394},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.4840068519115448},{"id":"https://openalex.org/C2780310539","wikidata":"https://www.wikidata.org/wiki/Q12547192","display_name":"Imperfect","level":2,"score":0.46846503019332886},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.4288651645183563},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2444019317626953},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.14930400252342224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1395666003227234},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.24963/ijcai.2019/66","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/66","pdf_url":"https://www.ijcai.org/proceedings/2019/0066.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1903.05614","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1903.05614","pdf_url":"https://arxiv.org/pdf/1903.05614","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},{"id":"mag:2922444559","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1903.05614.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1903.05614","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1903.05614","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2019/66","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/66","pdf_url":"https://www.ijcai.org/proceedings/2019/0066.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.75,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320314212","display_name":"Alberta Machine Intelligence Institute","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2922444559.pdf","grobid_xml":"https://content.openalex.org/works/W2922444559.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W190498140","https://openalex.org/W334144313","https://openalex.org/W1528676759","https://openalex.org/W1726776292","https://openalex.org/W1980717955","https://openalex.org/W2016384870","https://openalex.org/W2077723394","https://openalex.org/W2103315867","https://openalex.org/W2119717200","https://openalex.org/W2121863487","https://openalex.org/W2122634705","https://openalex.org/W2144274908","https://openalex.org/W2155027007","https://openalex.org/W2168356773","https://openalex.org/W2911296969","https://openalex.org/W2962713264","https://openalex.org/W3122766472"],"related_works":["https://openalex.org/W2964381205","https://openalex.org/W2103315867","https://openalex.org/W2982316857","https://openalex.org/W2773381986","https://openalex.org/W2291986326","https://openalex.org/W2144274908","https://openalex.org/W2766447205","https://openalex.org/W2736601468","https://openalex.org/W2121863487","https://openalex.org/W2960876848","https://openalex.org/W2892013712","https://openalex.org/W1542941925","https://openalex.org/W2964043796","https://openalex.org/W2963937357","https://openalex.org/W2786036274","https://openalex.org/W2155027007","https://openalex.org/W2145339207","https://openalex.org/W2096145798","https://openalex.org/W1518858799","https://openalex.org/W1192553058"],"abstract_inverted_index":{"In":[0],"this":[1,34,52,137],"paper,":[2],"we":[3,106],"present":[4],"exploitability":[5,37],"descent,":[6],"a":[7,39,59],"new":[8],"algorithm":[9,110],"to":[10,44,58,75,91,121],"compute":[11],"approximate":[12],"equilibria":[13],"in":[14,95,99,115,132],"two-player":[15],"zero-sum":[16],"extensive-form":[17],"games":[18,98,135],"with":[19],"imperfect":[20,133],"information,":[21],"by":[22],"direct":[23],"policy":[24],"optimization":[25],"against":[26],"worst-case":[27],"opponents.":[28],"We":[29],"prove":[30],"that":[31,108],"when":[32,48],"following":[33],"optimization,":[35,53],"the":[36,54,76,82,100,112,118,122,128],"of":[38,117,124,139],"player's":[40],"strategy":[41],"converges":[42],"asymptotically":[43],"zero,":[45],"and":[46,66,93],"hence":[47],"both":[49],"players":[50],"employ":[51],"joint":[55],"policies":[56,77],"converge":[57],"Nash":[60],"equilibrium.":[61],"Unlike":[62],"fictitious":[63],"play":[64],"(XFP)":[65],"counterfactual":[67],"regret":[68],"minimization":[69],"(CFR),":[70],"our":[71,109,125],"convergence":[72,88],"result":[73,131],"pertains":[74],"being":[78],"optimized":[79],"rather":[80],"than":[81],"average":[83],"policies.":[84],"Our":[85],"experiments":[86],"demonstrate":[87],"rates":[89],"comparable":[90],"XFP":[92],"CFR":[94],"four":[96],"benchmark":[97],"tabular":[101,113],"case.":[102],"Using":[103],"function":[104],"approximation,":[105],"find":[107],"outperforms":[111],"version":[114],"two":[116],"games,":[119],"which,":[120],"best":[123],"knowledge,":[126],"is":[127],"first":[129],"such":[130],"information":[134],"among":[136],"class":[138],"algorithms.":[140]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
