{"id":"https://openalex.org/W2145092134","doi":"https://doi.org/10.1109/cig.2011.6032005","title":"Using the online cross-entropy method to learn relational policies for playing different games","display_name":"Using the online cross-entropy method to learn relational policies for playing different games","publication_year":2011,"publication_date":"2011-08-01","ids":{"openalex":"https://openalex.org/W2145092134","doi":"https://doi.org/10.1109/cig.2011.6032005","mag":"2145092134"},"language":"en","primary_location":{"id":"doi:10.1109/cig.2011.6032005","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cig.2011.6032005","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE Conference on Computational Intelligence and Games (CIG'11)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hdl.handle.net/10289/5837","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005207564","display_name":"Samuel Sarjant","orcid":"https://orcid.org/0000-0002-9478-5835"},"institutions":[{"id":"https://openalex.org/I52179390","display_name":"University of Waikato","ror":"https://ror.org/013fsnh78","country_code":"NZ","type":"education","lineage":["https://openalex.org/I52179390"]}],"countries":["NZ"],"is_corresponding":true,"raw_author_name":"Samuel Sarjant","raw_affiliation_strings":["Faculty of Computing and Mathematical Sciences, University of Waikato, New Zealand","Faculty of Computing and Mathematical Sciences at The University of Waikato, New Zealand"],"affiliations":[{"raw_affiliation_string":"Faculty of Computing and Mathematical Sciences, University of Waikato, New Zealand","institution_ids":["https://openalex.org/I52179390"]},{"raw_affiliation_string":"Faculty of Computing and Mathematical Sciences at The University of Waikato, New Zealand","institution_ids":["https://openalex.org/I52179390"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087785022","display_name":"Bernhard Pfahringer","orcid":"https://orcid.org/0000-0002-3732-5787"},"institutions":[{"id":"https://openalex.org/I52179390","display_name":"University of Waikato","ror":"https://ror.org/013fsnh78","country_code":"NZ","type":"education","lineage":["https://openalex.org/I52179390"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Bernhard Pfahringer","raw_affiliation_strings":["Faculty of Computing and Mathematical Sciences, University of Waikato, New Zealand","Faculty of Computing and Mathematical Sciences at The University of Waikato, New Zealand"],"affiliations":[{"raw_affiliation_string":"Faculty of Computing and Mathematical Sciences, University of Waikato, New Zealand","institution_ids":["https://openalex.org/I52179390"]},{"raw_affiliation_string":"Faculty of Computing and Mathematical Sciences at The University of Waikato, New Zealand","institution_ids":["https://openalex.org/I52179390"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002817190","display_name":"Kurt Driessens","orcid":"https://orcid.org/0000-0001-7871-2495"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Kurt Driessens","raw_affiliation_strings":["Department of Knowledge Engineering, Maastricht University, Netherlands","Department of Knowledge Engineering at Maastricht University, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Department of Knowledge Engineering, Maastricht University, Netherlands","institution_ids":["https://openalex.org/I34352273"]},{"raw_affiliation_string":"Department of Knowledge Engineering at Maastricht University, The Netherlands","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027185136","display_name":"Tony C. Smith","orcid":null},"institutions":[{"id":"https://openalex.org/I52179390","display_name":"University of Waikato","ror":"https://ror.org/013fsnh78","country_code":"NZ","type":"education","lineage":["https://openalex.org/I52179390"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Tony Smith","raw_affiliation_strings":["Faculty of Computing and Mathematical Sciences, University of Waikato, New Zealand","Faculty of Computing and Mathematical Sciences at The University of Waikato, New Zealand"],"affiliations":[{"raw_affiliation_string":"Faculty of Computing and Mathematical Sciences, University of Waikato, New Zealand","institution_ids":["https://openalex.org/I52179390"]},{"raw_affiliation_string":"Faculty of Computing and Mathematical Sciences at The University of Waikato, New Zealand","institution_ids":["https://openalex.org/I52179390"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5005207564"],"corresponding_institution_ids":["https://openalex.org/I52179390"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.13362591,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"801 1988 abs","issue":null,"first_page":"182","last_page":"189"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.90420001745224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7752664089202881},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7566463351249695},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5921730995178223},{"id":"https://openalex.org/keywords/cross-entropy-method","display_name":"Cross-entropy method","score":0.5527582764625549},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5499593019485474},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.50115966796875},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4481043219566345},{"id":"https://openalex.org/keywords/online-learning","display_name":"Online learning","score":0.4280911982059479},{"id":"https://openalex.org/keywords/statistical-relational-learning","display_name":"Statistical relational learning","score":0.412430077791214},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.30091971158981323},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.23849251866340637},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1853860318660736},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.10567235946655273}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7752664089202881},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7566463351249695},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5921730995178223},{"id":"https://openalex.org/C75782508","wikidata":"https://www.wikidata.org/wiki/Q3333633","display_name":"Cross-entropy method","level":4,"score":0.5527582764625549},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5499593019485474},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.50115966796875},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4481043219566345},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.4280911982059479},{"id":"https://openalex.org/C177877439","wikidata":"https://www.wikidata.org/wiki/Q7604413","display_name":"Statistical relational learning","level":3,"score":0.412430077791214},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.30091971158981323},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.23849251866340637},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1853860318660736},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.10567235946655273},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C98036226","wikidata":"https://www.wikidata.org/wiki/Q7268356","display_name":"Quadratic assignment problem","level":3,"score":0.0},{"id":"https://openalex.org/C52692508","wikidata":"https://www.wikidata.org/wiki/Q1333872","display_name":"Combinatorial optimization","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/cig.2011.6032005","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cig.2011.6032005","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE Conference on Computational Intelligence and Games (CIG'11)","raw_type":"proceedings-article"},{"id":"pmh:oai:cris.maastrichtuniversity.nl:openaire_cris_publications/4c2d237b-43ed-46f3-8bef-c0ab1eba9d05","is_oa":false,"landing_page_url":"https://cris.maastrichtuniversity.nl/en/publications/4c2d237b-43ed-46f3-8bef-c0ab1eba9d05","pdf_url":null,"source":{"id":"https://openalex.org/S4306402616","display_name":"Research Publications (Maastricht University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I34352273","host_organization_name":"Maastricht University","host_organization_lineage":["https://openalex.org/I34352273"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Sarjant, S, Pfahringer, B, Driessens, K & Smith, T 2011, Using the online cross-entropy method to learn relational policies for playing different games. in IEEE Conference on Computational Intelligence and Games (CIG 2011). Seoul, South Korea, pp. 182-189. https://doi.org/10.1109/CIG.2011.6032005","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:researchcommons.waikato.ac.nz:10289/5837","is_oa":true,"landing_page_url":"https://hdl.handle.net/10289/5837","pdf_url":"https://hdl.handle.net/10289/5837","source":{"id":"https://openalex.org/S4306400944","display_name":"Research Commons (University of Waikato)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I52179390","host_organization_name":"University of Waikato","host_organization_lineage":["https://openalex.org/I52179390"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"CIG 2011","raw_type":"Conference Contribution"}],"best_oa_location":{"id":"pmh:oai:researchcommons.waikato.ac.nz:10289/5837","is_oa":true,"landing_page_url":"https://hdl.handle.net/10289/5837","pdf_url":"https://hdl.handle.net/10289/5837","source":{"id":"https://openalex.org/S4306400944","display_name":"Research Commons (University of Waikato)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I52179390","host_organization_name":"University of Waikato","host_organization_lineage":["https://openalex.org/I52179390"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"CIG 2011","raw_type":"Conference Contribution"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2145092134.pdf","grobid_xml":"https://content.openalex.org/works/W2145092134.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W42392872","https://openalex.org/W169477153","https://openalex.org/W1490954610","https://openalex.org/W1514583064","https://openalex.org/W1537809898","https://openalex.org/W1569963244","https://openalex.org/W1761227569","https://openalex.org/W1985658808","https://openalex.org/W2000906577","https://openalex.org/W2041367235","https://openalex.org/W2043700026","https://openalex.org/W2099587183","https://openalex.org/W2119567691","https://openalex.org/W2123859855","https://openalex.org/W2125922627","https://openalex.org/W2132083787","https://openalex.org/W2132713246","https://openalex.org/W2334782222","https://openalex.org/W4285719527","https://openalex.org/W6601709131","https://openalex.org/W6634246166","https://openalex.org/W6637945311","https://openalex.org/W6675434749"],"related_works":["https://openalex.org/W3181676408","https://openalex.org/W2112176619","https://openalex.org/W1549959306","https://openalex.org/W320292658","https://openalex.org/W2212764924","https://openalex.org/W98006832","https://openalex.org/W2186138942","https://openalex.org/W2806326686","https://openalex.org/W1993907735","https://openalex.org/W2001007279"],"abstract_inverted_index":{"By":[0],"defining":[1],"a":[2,6,26,42,50],"video-game":[3],"environment":[4],"as":[5,62],"collection":[7],"of":[8,28,44,47,60],"objects,":[9],"relations,":[10],"actions":[11],"and":[12,24,49,74],"rewards,":[13],"the":[14,58,63,71,80],"relational":[15,31],"reinforcement":[16],"learning":[17,38,61],"algorithm":[18,67],"presented":[19],"in":[20],"this":[21],"paper":[22],"generates":[23],"optimises":[25],"set":[27],"concise,":[29],"human-readable":[30],"rules":[32,48],"for":[33,86],"achieving":[34],"maximal":[35],"reward.":[36],"Rule":[37],"is":[39,68],"achieved":[40],"using":[41],"combination":[43],"incremental":[45],"specialisation":[46],"modified":[51],"online":[52],"cross-entropy":[53],"method,":[54],"which":[55],"dynamically":[56],"adjusts":[57],"rate":[59],"agent":[64,81],"progresses.":[65],"The":[66],"tested":[69],"on":[70],"Ms.":[72],"Pac-Man":[73],"Mario":[75],"environments,":[76],"with":[77],"results":[78],"indicating":[79],"learns":[82],"an":[83],"effective":[84],"policy":[85],"acting":[87],"within":[88],"each":[89],"environment.":[90]},"counts_by_year":[{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
