{"id":"https://openalex.org/W3093519064","doi":"https://doi.org/10.1109/cog47356.2020.9231589","title":"Manipulating the Distributions of Experience used for Self-Play Learning in Expert Iteration","display_name":"Manipulating the Distributions of Experience used for Self-Play Learning in Expert Iteration","publication_year":2020,"publication_date":"2020-08-01","ids":{"openalex":"https://openalex.org/W3093519064","doi":"https://doi.org/10.1109/cog47356.2020.9231589","mag":"3093519064"},"language":"en","primary_location":{"id":"doi:10.1109/cog47356.2020.9231589","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cog47356.2020.9231589","pdf_url":null,"source":{"id":"https://openalex.org/S4306498668","display_name":"2020 IEEE Conference on Games (CoG)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE Conference on Games (CoG)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://cris.maastrichtuniversity.nl/en/publications/4f0db36d-d906-4270-ab60-98422736e0ea","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049574317","display_name":"Dennis J. N. J. Soemers","orcid":"https://orcid.org/0000-0003-3241-8957"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Dennis J. N. J. Soemers","raw_affiliation_strings":["Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061767166","display_name":"\u00c9ric Piette","orcid":"https://orcid.org/0000-0001-8355-636X"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Eric Piette","raw_affiliation_strings":["Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075491769","display_name":"Matthew Stephenson","orcid":"https://orcid.org/0000-0002-3867-5842"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Matthew Stephenson","raw_affiliation_strings":["Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065924402","display_name":"Cameron Browne","orcid":"https://orcid.org/0000-0003-2997-3255"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Cameron Browne","raw_affiliation_strings":["Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, the Netherlands","institution_ids":["https://openalex.org/I34352273"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.446,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.66274442,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"245","last_page":"252"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11674","display_name":"Sports Analytics and Performance","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monte-carlo-tree-search","display_name":"Monte Carlo tree search","score":0.7513625025749207},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7424100637435913},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.6421692967414856},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5891677737236023},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.529512882232666},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5204423069953918},{"id":"https://openalex.org/keywords/exploratory-search","display_name":"Exploratory search","score":0.43357062339782715},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.42025452852249146},{"id":"https://openalex.org/keywords/monte-carlo-method","display_name":"Monte Carlo method","score":0.30894553661346436},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12951955199241638},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10242509841918945}],"concepts":[{"id":"https://openalex.org/C46149586","wikidata":"https://www.wikidata.org/wiki/Q11785332","display_name":"Monte Carlo tree search","level":3,"score":0.7513625025749207},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7424100637435913},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.6421692967414856},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5891677737236023},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.529512882232666},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5204423069953918},{"id":"https://openalex.org/C2777866876","wikidata":"https://www.wikidata.org/wiki/Q5421358","display_name":"Exploratory search","level":2,"score":0.43357062339782715},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.42025452852249146},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.30894553661346436},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12951955199241638},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10242509841918945},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/cog47356.2020.9231589","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cog47356.2020.9231589","pdf_url":null,"source":{"id":"https://openalex.org/S4306498668","display_name":"2020 IEEE Conference on Games (CoG)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE Conference on Games (CoG)","raw_type":"proceedings-article"},{"id":"pmh:oai:cris.maastrichtuniversity.nl:openaire/4f0db36d-d906-4270-ab60-98422736e0ea","is_oa":true,"landing_page_url":"https://cris.maastrichtuniversity.nl/en/publications/4f0db36d-d906-4270-ab60-98422736e0ea","pdf_url":null,"source":{"id":"https://openalex.org/S4306402616","display_name":"Research Publications (Maastricht University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I34352273","host_organization_name":"Maastricht University","host_organization_lineage":["https://openalex.org/I34352273"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Soemers, D, Piette, E, Stephenson, M & Browne, C 2020, Manipulating the Distributions of Experience used for Self-Play Learning in Expert Iteration. in IEEE Conference on Games : (CoG'20). IEEE, Osaka, Japan, IEEE Conference on Computational Intelligence and Games, pp. 245-252, 2020 IEEE Conference on Games (CoG), 24/08/20. https://doi.org/10.1109/CoG47356.2020.9231589","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:cris.maastrichtuniversity.nl:publications/4f0db36d-d906-4270-ab60-98422736e0ea","is_oa":true,"landing_page_url":"https://ieeexplore.ieee.org/document/9231589","pdf_url":null,"source":{"id":"https://openalex.org/S4306402616","display_name":"Research Publications (Maastricht University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I34352273","host_organization_name":"Maastricht University","host_organization_lineage":["https://openalex.org/I34352273"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Soemers, D, Piette, E, Stephenson, M & Browne, C 2020, Manipulating the Distributions of Experience used for Self-Play Learning in Expert Iteration. in IEEE Conference on Games : (CoG'20). IEEE, Osaka, Japan, IEEE Conference on Computational Intelligence and Games, pp. 245-252, 2020 IEEE Conference on Games (CoG), 24/08/20. https://doi.org/10.1109/CoG47356.2020.9231589","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:dial.uclouvain.be:boreal:276828","is_oa":true,"landing_page_url":"http://hdl.handle.net/2078/276828","pdf_url":null,"source":{"id":"https://openalex.org/S4306401902","display_name":"Digital Access to Libraries (Universit\u00e9 catholique de Louvain (UCL), l'Universit\u00e9 de Namur (UNamur) and the Universit\u00e9 Saint-Louis (USL-B))","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I95674353","host_organization_name":"UCLouvain","host_organization_lineage":["https://openalex.org/I95674353"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:cris.maastrichtuniversity.nl:openaire/4f0db36d-d906-4270-ab60-98422736e0ea","is_oa":true,"landing_page_url":"https://cris.maastrichtuniversity.nl/en/publications/4f0db36d-d906-4270-ab60-98422736e0ea","pdf_url":null,"source":{"id":"https://openalex.org/S4306402616","display_name":"Research Publications (Maastricht University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I34352273","host_organization_name":"Maastricht University","host_organization_lineage":["https://openalex.org/I34352273"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Soemers, D, Piette, E, Stephenson, M & Browne, C 2020, Manipulating the Distributions of Experience used for Self-Play Learning in Expert Iteration. in IEEE Conference on Games : (CoG'20). IEEE, Osaka, Japan, IEEE Conference on Computational Intelligence and Games, pp. 245-252, 2020 IEEE Conference on Games (CoG), 24/08/20. https://doi.org/10.1109/CoG47356.2020.9231589","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W65193931","https://openalex.org/W1514587017","https://openalex.org/W1600046456","https://openalex.org/W1625390266","https://openalex.org/W1714211023","https://openalex.org/W1810943226","https://openalex.org/W2014512216","https://openalex.org/W2063471322","https://openalex.org/W2107741520","https://openalex.org/W2119717200","https://openalex.org/W2121863487","https://openalex.org/W2126316555","https://openalex.org/W2145339207","https://openalex.org/W2147632348","https://openalex.org/W2201581102","https://openalex.org/W2257979135","https://openalex.org/W2294013160","https://openalex.org/W2618097077","https://openalex.org/W2761873684","https://openalex.org/W2766447205","https://openalex.org/W2810602713","https://openalex.org/W2889632674","https://openalex.org/W2902907165","https://openalex.org/W2905342215","https://openalex.org/W2913118860","https://openalex.org/W2913687272","https://openalex.org/W2918318309","https://openalex.org/W2925418831","https://openalex.org/W2943930939","https://openalex.org/W2950137412","https://openalex.org/W2963477884","https://openalex.org/W2963642149","https://openalex.org/W2964291307","https://openalex.org/W2967349019","https://openalex.org/W2969287672","https://openalex.org/W2977093897","https://openalex.org/W2982316857","https://openalex.org/W3020882730","https://openalex.org/W3089514910","https://openalex.org/W3099518626","https://openalex.org/W3103831245","https://openalex.org/W4213251304","https://openalex.org/W6630907848","https://openalex.org/W6635798015","https://openalex.org/W6638273328","https://openalex.org/W6676320248","https://openalex.org/W6687681856","https://openalex.org/W6738855998","https://openalex.org/W6744838376","https://openalex.org/W6757988111","https://openalex.org/W6758829138","https://openalex.org/W6758872754","https://openalex.org/W6759832207","https://openalex.org/W6762220698","https://openalex.org/W6766749119"],"related_works":["https://openalex.org/W2571592646","https://openalex.org/W4247855592","https://openalex.org/W2567165815","https://openalex.org/W2740304877","https://openalex.org/W4226164546","https://openalex.org/W2766259847","https://openalex.org/W2112583639","https://openalex.org/W2946273706","https://openalex.org/W346633817","https://openalex.org/W2563266764"],"abstract_inverted_index":{"Expert":[0],"Iteration":[1],"(ExIt)":[2],"is":[3,119,143],"an":[4],"effective":[5],"framework":[6],"for":[7,74,89],"learning":[8,90],"game-playing":[9],"policies":[10],"from":[11,81,92,129],"self-play.":[12,151],"ExIt":[13,123],"involves":[14],"training":[15,136,161,175],"a":[16,24,139],"policy":[17,39,44,142],"to":[18,40,125,133,145],"mimic":[19],"the":[20,37,46,63,76,84,93,104,107,122,147,155],"search":[21,26,33,48,66],"behaviour":[22],"of":[23,62,78,106,157],"tree":[25,32,47,65],"algorithm":[27],"--":[28,34],"such":[29],"as":[30],"Monte-Carlo":[31],"and":[35,45,83,180],"using":[36],"trained":[38,140],"guide":[41],"it.":[42],"The":[43],"can":[49],"then":[50],"iteratively":[51],"improve":[52],"each":[53],"other,":[54],"through":[55],"experience":[56,128],"gathered":[57],"in":[58,98,109,150,164,173,177],"self-play":[59],"between":[60],"instances":[61],"guided":[64],"algorithm.":[67],"This":[68,152],"paper":[69,153],"outlines":[70],"three":[71],"different":[72,166],"approaches":[73],"manipulating":[75],"distribution":[77],"data":[79],"collected":[80,94],"self-play,":[82],"procedure":[85],"that":[86],"samples":[87,97],"batches":[88,99],"updates":[91],"data.":[95],"Firstly,":[96],"are":[100],"weighted":[101],"based":[102],"on":[103,160],"durations":[105],"episodes":[108],"which":[110,130],"they":[111],"were":[112],"originally":[113],"experienced.":[114],"Secondly,":[115],"Prioritized":[116],"Experience":[117],"Replay":[118],"applied":[120],"within":[121],"framework,":[124],"prioritise":[126],"sampling":[127],"we":[131],"expect":[132],"obtain":[134],"valuable":[135],"signals.":[137],"Thirdly,":[138],"exploratory":[141],"used":[144],"diversify":[146],"trajectories":[148],"experienced":[149],"summarises":[154],"effects":[156],"these":[158],"manipulations":[159],"performance":[162,176],"evaluated":[163],"fourteen":[165,185],"board":[167],"games.":[168,186],"We":[169],"find":[170],"major":[171],"improvements":[172,182],"early":[174],"some":[178],"games,":[179],"minor":[181],"averaged":[183],"over":[184]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}