{"id":"https://openalex.org/W2945417546","doi":"https://doi.org/10.1109/cig.2019.8848037","title":"Learning Policies from Self-Play with Policy Gradients and MCTS Value Estimates","display_name":"Learning Policies from Self-Play with Policy Gradients and MCTS Value Estimates","publication_year":2019,"publication_date":"2019-08-01","ids":{"openalex":"https://openalex.org/W2945417546","doi":"https://doi.org/10.1109/cig.2019.8848037","mag":"2945417546"},"language":"en","primary_location":{"id":"doi:10.1109/cig.2019.8848037","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cig.2019.8848037","pdf_url":null,"source":{"id":"https://openalex.org/S4306498491","display_name":"2019 IEEE Conference on Games (CoG)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Conference on Games (CoG)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://cris.maastrichtuniversity.nl/en/publications/4b0906b7-049c-43ea-adb4-93ac7dd61470","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049574317","display_name":"Dennis J. N. J. Soemers","orcid":"https://orcid.org/0000-0003-3241-8957"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Dennis J. N. J. Soemers","raw_affiliation_strings":["Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands","Maastricht University"],"affiliations":[{"raw_affiliation_string":"Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands","institution_ids":["https://openalex.org/I34352273"]},{"raw_affiliation_string":"Maastricht University","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061767166","display_name":"\u00c9ric Piette","orcid":"https://orcid.org/0000-0001-8355-636X"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Eric Piette","raw_affiliation_strings":["Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands","Maastricht University"],"affiliations":[{"raw_affiliation_string":"Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands","institution_ids":["https://openalex.org/I34352273"]},{"raw_affiliation_string":"Maastricht University","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075491769","display_name":"Matthew Stephenson","orcid":"https://orcid.org/0000-0002-3867-5842"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Matthew Stephenson","raw_affiliation_strings":["Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands","Maastricht University"],"affiliations":[{"raw_affiliation_string":"Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands","institution_ids":["https://openalex.org/I34352273"]},{"raw_affiliation_string":"Maastricht University","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065924402","display_name":"Cameron Browne","orcid":"https://orcid.org/0000-0003-2997-3255"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Cameron Browne","raw_affiliation_strings":["Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands","Maastricht University"],"affiliations":[{"raw_affiliation_string":"Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands","institution_ids":["https://openalex.org/I34352273"]},{"raw_affiliation_string":"Maastricht University","institution_ids":["https://openalex.org/I34352273"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5049574317"],"corresponding_institution_ids":["https://openalex.org/I34352273"],"apc_list":null,"apc_paid":null,"fwci":0.1333,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.4621097,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"529","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9419999718666077,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9419999718666077,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.04050000011920929,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.00279999990016222,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monte-carlo-tree-search","display_name":"Monte Carlo tree search","score":0.8145104050636292},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7416683435440063},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.695284903049469},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5304208397865295},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.4648735821247101},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.45212841033935547},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4386027455329895},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4118267893791199},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.1969468593597412},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.16168174147605896},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1198708713054657}],"concepts":[{"id":"https://openalex.org/C46149586","wikidata":"https://www.wikidata.org/wiki/Q11785332","display_name":"Monte Carlo tree search","level":3,"score":0.8145104050636292},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7416683435440063},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.695284903049469},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5304208397865295},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.4648735821247101},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.45212841033935547},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4386027455329895},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4118267893791199},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.1969468593597412},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.16168174147605896},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1198708713054657},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.1109/cig.2019.8848037","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cig.2019.8848037","pdf_url":null,"source":{"id":"https://openalex.org/S4306498491","display_name":"2019 IEEE Conference on Games (CoG)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Conference on Games (CoG)","raw_type":"proceedings-article"},{"id":"pmh:oai:cris.maastrichtuniversity.nl:openaire/4b0906b7-049c-43ea-adb4-93ac7dd61470","is_oa":true,"landing_page_url":"https://cris.maastrichtuniversity.nl/en/publications/4b0906b7-049c-43ea-adb4-93ac7dd61470","pdf_url":null,"source":{"id":"https://openalex.org/S4306402616","display_name":"Research Publications (Maastricht University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I34352273","host_organization_name":"Maastricht University","host_organization_lineage":["https://openalex.org/I34352273"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Soemers, D, Piette, E, Stephenson, M & Browne, C 2019, Learning Policies from Self-Play with Policy Gradients and MCTS Value Estimates. in IEEE Conference on Games : (CoG'19). IEEE, IEEE Conference on Computational Intelligence and Games, pp. 329-336, IEEE Conference on Games (IEEE COG), London, United Kingdom, 20/08/19. https://doi.org/10.1109/CIG.2019.8848037","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:arXiv.org:1905.05809","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.05809","pdf_url":"https://arxiv.org/pdf/1905.05809","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:HAL:hal-03594863v1","is_oa":false,"landing_page_url":"https://hal.science/hal-03594863","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2019 IEEE Conference On Games, Aug 2019, London, United Kingdom","raw_type":"Conference papers"},{"id":"pmh:oai:dial.uclouvain.be:boreal:276823","is_oa":true,"landing_page_url":"http://hdl.handle.net/2078/276823","pdf_url":null,"source":{"id":"https://openalex.org/S4306401902","display_name":"Digital Access to Libraries (Universit\u00e9 catholique de Louvain (UCL), l'Universit\u00e9 de Namur (UNamur) and the Universit\u00e9 Saint-Louis (USL-B))","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I95674353","host_organization_name":"UCLouvain","host_organization_lineage":["https://openalex.org/I95674353"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"doi:10.48550/arxiv.1905.05809","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1905.05809","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:cris.maastrichtuniversity.nl:openaire/4b0906b7-049c-43ea-adb4-93ac7dd61470","is_oa":true,"landing_page_url":"https://cris.maastrichtuniversity.nl/en/publications/4b0906b7-049c-43ea-adb4-93ac7dd61470","pdf_url":null,"source":{"id":"https://openalex.org/S4306402616","display_name":"Research Publications (Maastricht University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I34352273","host_organization_name":"Maastricht University","host_organization_lineage":["https://openalex.org/I34352273"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Soemers, D, Piette, E, Stephenson, M & Browne, C 2019, Learning Policies from Self-Play with Policy Gradients and MCTS Value Estimates. in IEEE Conference on Games : (CoG'19). IEEE, IEEE Conference on Computational Intelligence and Games, pp. 329-336, IEEE Conference on Games (IEEE COG), London, United Kingdom, 20/08/19. https://doi.org/10.1109/CIG.2019.8848037","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321016","display_name":"Universiteit Maastricht","ror":"https://ror.org/02jz4aj89"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W58433569","https://openalex.org/W1500868819","https://openalex.org/W1625390266","https://openalex.org/W1714211023","https://openalex.org/W1810943226","https://openalex.org/W1888434271","https://openalex.org/W1995945562","https://openalex.org/W2020135152","https://openalex.org/W2119717200","https://openalex.org/W2121863487","https://openalex.org/W2126316555","https://openalex.org/W2128020951","https://openalex.org/W2134400955","https://openalex.org/W2149523275","https://openalex.org/W2153039919","https://openalex.org/W2154291838","https://openalex.org/W2155027007","https://openalex.org/W2156347136","https://openalex.org/W2168405694","https://openalex.org/W2257979135","https://openalex.org/W2466732996","https://openalex.org/W2468569233","https://openalex.org/W2589891087","https://openalex.org/W2646036860","https://openalex.org/W2753511062","https://openalex.org/W2766447205","https://openalex.org/W2778917778","https://openalex.org/W2797563284","https://openalex.org/W2896888044","https://openalex.org/W2902907165","https://openalex.org/W2913687272","https://openalex.org/W2962821147","https://openalex.org/W2962862931","https://openalex.org/W2963120839","https://openalex.org/W2963642149","https://openalex.org/W2967349019","https://openalex.org/W6627932998","https://openalex.org/W6638273328","https://openalex.org/W6683001934","https://openalex.org/W6683204974","https://openalex.org/W6737947904","https://openalex.org/W6738855998","https://openalex.org/W6739899130","https://openalex.org/W6744039304","https://openalex.org/W6744123322","https://openalex.org/W6756287877","https://openalex.org/W6758829138"],"related_works":["https://openalex.org/W2908726240","https://openalex.org/W2757711011","https://openalex.org/W2735649811","https://openalex.org/W3213301335","https://openalex.org/W2986077564","https://openalex.org/W3081517182","https://openalex.org/W3008543765","https://openalex.org/W3129404076","https://openalex.org/W2903764415","https://openalex.org/W2529439","https://openalex.org/W2044007646","https://openalex.org/W318938809","https://openalex.org/W2043424863","https://openalex.org/W2535343491","https://openalex.org/W2894672498","https://openalex.org/W2184830374","https://openalex.org/W1576354143","https://openalex.org/W2134345668","https://openalex.org/W2951755790","https://openalex.org/W3125544878"],"abstract_inverted_index":{"In":[0,75],"recent":[1],"years,":[2],"state-of-the-art":[3,98],"game-playing":[4,99],"agents":[5],"often":[6],"involve":[7],"policies":[8,24,36,60,83,123],"that":[9,106,124],"are":[10,37,65,79,125],"trained":[11,23,38,61],"in":[12,62,81,160],"self-playing":[13],"processes":[14],"where":[15],"Monte":[16],"Carlo":[17],"tree":[18],"search":[19,42],"(MCTS)":[20],"algorithms":[21],"and":[22,114],"iteratively":[25],"improve":[26],"each":[27],"other.":[28],"The":[29],"strongest":[30],"results":[31],"have":[32],"been":[33],"obtained":[34],"when":[35],"to":[39,68],"mimic":[40],"the":[41,91],"behaviour":[43],"of":[44,58,73,93,110,157,163],"MCTS":[45,144,149],"by":[46,53],"minimising":[47],"a":[48,70,85,117,130,161],"cross-entropy":[49],"loss.":[50],"Because":[51],"MCTS,":[52],"design,":[54],"includes":[55],"an":[56,108],"element":[57],"exploration,":[59],"this":[63,76,136],"manner":[64],"also":[66],"likely":[67],"exhibit":[69],"similar":[71],"extent":[72,109],"exploration.":[74],"paper,":[77],"we":[78,104,115],"interested":[80],"learning":[82],"for":[84,121,134],"project":[86],"with":[87],"future":[88],"goals":[89],"including":[90],"extraction":[92],"interpretable":[94],"strategies,":[95],"rather":[96,147],"than":[97,148],"performance.":[100],"For":[101],"these":[102],"goals,":[103],"argue":[105],"such":[107],"exploration":[111],"is":[112],"undesirable,":[113],"propose":[116],"novel":[118],"objective":[119,137],"function":[120],"training":[122],"not":[126],"exploratory.":[127],"We":[128,152],"derive":[129],"policy":[131],"gradient":[132],"expression":[133],"maximising":[135],"function,":[138],"which":[139],"can":[140],"be":[141],"estimated":[142],"using":[143],"value":[145],"estimates,":[146],"visit":[150],"counts.":[151],"empirically":[153],"evaluate":[154],"various":[155],"properties":[156],"resulting":[158],"policies,":[159],"variety":[162],"board":[164],"games.":[165]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
