{"id":"https://openalex.org/W2580475959","doi":"https://doi.org/10.1109/tciaig.2017.2658659","title":"Learning Options From Demonstrations: A &lt;italic&gt;Pac-Man&lt;/italic&gt; Case Study","display_name":"Learning Options From Demonstrations: A &lt;italic&gt;Pac-Man&lt;/italic&gt; Case Study","publication_year":2017,"publication_date":"2017-01-25","ids":{"openalex":"https://openalex.org/W2580475959","doi":"https://doi.org/10.1109/tciaig.2017.2658659","mag":"2580475959"},"language":"en","primary_location":{"id":"doi:10.1109/tciaig.2017.2658659","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tciaig.2017.2658659","pdf_url":null,"source":{"id":"https://openalex.org/S4210224842","display_name":"IEEE Transactions on Games","issn_l":"2475-1502","issn":["2475-1502","2475-1510"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Games","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016550498","display_name":"Marco Tamassia","orcid":"https://orcid.org/0000-0001-6567-1380"},"institutions":[{"id":"https://openalex.org/I82951845","display_name":"RMIT University","ror":"https://ror.org/04ttjf776","country_code":"AU","type":"education","lineage":["https://openalex.org/I82951845"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Marco Tamassia","raw_affiliation_strings":["RMIT University, Melbourne, Australia"],"raw_orcid":"https://orcid.org/0000-0001-6567-1380","affiliations":[{"raw_affiliation_string":"RMIT University, Melbourne, Australia","institution_ids":["https://openalex.org/I82951845"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061528333","display_name":"Fabio Zambetta","orcid":"https://orcid.org/0000-0003-4133-7913"},"institutions":[{"id":"https://openalex.org/I82951845","display_name":"RMIT University","ror":"https://ror.org/04ttjf776","country_code":"AU","type":"education","lineage":["https://openalex.org/I82951845"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Fabio Zambetta","raw_affiliation_strings":["RMIT University, Melbourne, Australia"],"raw_orcid":"https://orcid.org/0000-0003-4133-7913","affiliations":[{"raw_affiliation_string":"RMIT University, Melbourne, Australia","institution_ids":["https://openalex.org/I82951845"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075906421","display_name":"William Raffe","orcid":"https://orcid.org/0000-0001-5310-0943"},"institutions":[{"id":"https://openalex.org/I82951845","display_name":"RMIT University","ror":"https://ror.org/04ttjf776","country_code":"AU","type":"education","lineage":["https://openalex.org/I82951845"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"William L. Raffe","raw_affiliation_strings":["RMIT University, Melbourne, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"RMIT University, Melbourne, Australia","institution_ids":["https://openalex.org/I82951845"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071394525","display_name":"Florian Mueller","orcid":"https://orcid.org/0000-0001-6472-3476"},"institutions":[{"id":"https://openalex.org/I82951845","display_name":"RMIT University","ror":"https://ror.org/04ttjf776","country_code":"AU","type":"education","lineage":["https://openalex.org/I82951845"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Florian Mueller","raw_affiliation_strings":["RMIT University, Melbourne, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"RMIT University, Melbourne, Australia","institution_ids":["https://openalex.org/I82951845"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100369719","display_name":"Xiaodong Li","orcid":"https://orcid.org/0000-0003-0346-1526"},"institutions":[{"id":"https://openalex.org/I82951845","display_name":"RMIT University","ror":"https://ror.org/04ttjf776","country_code":"AU","type":"education","lineage":["https://openalex.org/I82951845"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xiaodong Li","raw_affiliation_strings":["RMIT University, Melbourne, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"RMIT University, Melbourne, Australia","institution_ids":["https://openalex.org/I82951845"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5016550498"],"corresponding_institution_ids":["https://openalex.org/I82951845"],"apc_list":null,"apc_paid":null,"fwci":0.6236,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.75830347,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"10","issue":"1","first_page":"91","last_page":"96"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7938581109046936},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7227538824081421},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6933310031890869},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.561877429485321},{"id":"https://openalex.org/keywords/duration","display_name":"Duration (music)","score":0.4506179094314575},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.43209171295166016},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.42615365982055664},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.16506263613700867}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7938581109046936},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7227538824081421},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6933310031890869},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.561877429485321},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.4506179094314575},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.43209171295166016},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.42615365982055664},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.16506263613700867},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tciaig.2017.2658659","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tciaig.2017.2658659","pdf_url":null,"source":{"id":"https://openalex.org/S4210224842","display_name":"IEEE Transactions on Games","issn_l":"2475-1502","issn":["2475-1502","2475-1510"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Games","raw_type":"journal-article"},{"id":"pmh:oai:alma.61RMIT_INST:11247264210001341","is_oa":false,"landing_page_url":"https://doi.org/10.1109/TCIAIG.2017.2658659","pdf_url":null,"source":{"id":"https://openalex.org/S4306402074","display_name":"RMIT Research Repository (RMIT University Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I82951845","host_organization_name":"RMIT University","host_organization_lineage":["https://openalex.org/I82951845"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320972","display_name":"RMIT University","ror":"https://ror.org/04ttjf776"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W196004314","https://openalex.org/W1515851193","https://openalex.org/W1536990779","https://openalex.org/W1556824961","https://openalex.org/W1586944634","https://openalex.org/W1601389419","https://openalex.org/W1777239053","https://openalex.org/W1949804828","https://openalex.org/W1968768508","https://openalex.org/W1980054804","https://openalex.org/W2090170171","https://openalex.org/W2101786389","https://openalex.org/W2106216496","https://openalex.org/W2109910161","https://openalex.org/W2111020392","https://openalex.org/W2122480991","https://openalex.org/W2138289776","https://openalex.org/W2143435603","https://openalex.org/W2146292423","https://openalex.org/W2155791599","https://openalex.org/W2160808139","https://openalex.org/W2161252410","https://openalex.org/W2172131460","https://openalex.org/W2563702229","https://openalex.org/W2906926620","https://openalex.org/W3010488986","https://openalex.org/W3011120880","https://openalex.org/W4211089519","https://openalex.org/W4246219036","https://openalex.org/W6635131279","https://openalex.org/W6638088447","https://openalex.org/W6677116005","https://openalex.org/W6680506192","https://openalex.org/W6680976742","https://openalex.org/W6682789603","https://openalex.org/W6683164292","https://openalex.org/W6684863604","https://openalex.org/W6730975572","https://openalex.org/W6758076146","https://openalex.org/W6775686901"],"related_works":["https://openalex.org/W2348837382","https://openalex.org/W746329893","https://openalex.org/W4205872570","https://openalex.org/W4245971243","https://openalex.org/W1922805944","https://openalex.org/W4253588120","https://openalex.org/W2383732295","https://openalex.org/W4248716494","https://openalex.org/W2942717012","https://openalex.org/W2162226802"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,6,25],"(RL)":[2],"is":[3],"a":[4,24,54,123],"machine":[5],"paradigm":[7],"behind":[8],"many":[9],"successes":[10],"in":[11,105],"games,":[12],"robotics,":[13],"and":[14,47,71,86],"control":[15],"applications.":[16],"RL":[17],"agents":[18],"improve":[19,75,118],"through":[20],"trial-and-error,":[21],"therefore":[22],"undergoing":[23],"phase":[26],"during":[27,40],"which":[28],"they":[29],"perform":[30],"suboptimally.":[31],"Research":[32],"effort":[33],"has":[34],"been":[35],"put":[36],"into":[37],"optimizing":[38],"behavior":[39],"this":[41],"period,":[42],"to":[43,48,74,98,122],"reduce":[44],"its":[45],"duration":[46],"maximize":[49],"after-learning":[50],"performance.":[51,76],"We":[52],"introduce":[53],"novel":[55],"algorithm":[56,78],"that":[57,114],"extracts":[58],"useful":[59],"information":[60],"from":[61],"expert":[62,85,91],"demonstrations":[63],"(traces":[64],"of":[65],"interactions":[66],"with":[67],"the":[68,84,90,106],"target":[69],"environment)":[70],"uses":[72],"it":[73],"The":[77],"detects":[79],"unexpected":[80],"decisions":[81,100],"made":[82],"by":[83],"infers":[87],"what":[88],"goal":[89],"was":[92],"pursuing.":[93],"Goals":[94],"are":[95],"then":[96],"used":[97],"bias":[99],"while":[101],"learning.":[102],"Our":[103],"experiments":[104],"video":[107],"game":[108],"Pac-Man":[109],"provide":[110],"statistically":[111],"significant":[112],"evidence":[113],"our":[115],"method":[116],"can":[117],"final":[119],"performance":[120],"compared":[121],"state-of-the-art":[124],"approach.":[125]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2017-02-03T00:00:00"}
