{"id":"https://openalex.org/W2958051327","doi":"https://doi.org/10.1017/s0269888919000031","title":"Introspective <i>Q</i> -learning and learning from demonstration","display_name":"Introspective <i>Q</i> -learning and learning from demonstration","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2958051327","doi":"https://doi.org/10.1017/s0269888919000031","mag":"2958051327"},"language":"en","primary_location":{"id":"doi:10.1017/s0269888919000031","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s0269888919000031","pdf_url":null,"source":{"id":"https://openalex.org/S137506714","display_name":"The Knowledge Engineering Review","issn_l":"0269-8889","issn":["0269-8889","1469-8005"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Knowledge Engineering Review","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100355195","display_name":"Mao Li","orcid":"https://orcid.org/0000-0001-8500-1165"},"institutions":[{"id":"https://openalex.org/I52099693","display_name":"University of York","ror":"https://ror.org/04m01e293","country_code":"GB","type":"education","lineage":["https://openalex.org/I52099693"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Mao Li","raw_affiliation_strings":["Computer Science Department, University of York, Deramore Ln, Heslington, York YO10 5GH, United Kingdom; e-mail:","Computer Science Department, University of York, Deramore Ln, Heslington, York YO10 5GH, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Department, University of York, Deramore Ln, Heslington, York YO10 5GH, United Kingdom; e-mail:","institution_ids":["https://openalex.org/I52099693"]},{"raw_affiliation_string":"Computer Science Department, University of York, Deramore Ln, Heslington, York YO10 5GH, United Kingdom","institution_ids":["https://openalex.org/I52099693"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084002137","display_name":"Tim Brys","orcid":null},"institutions":[{"id":"https://openalex.org/I13469542","display_name":"Vrije Universiteit Brussel","ror":"https://ror.org/006e5kg04","country_code":"BE","type":"education","lineage":["https://openalex.org/I13469542"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Tim Brys","raw_affiliation_strings":["Computer Science Department, Vrije Universiteit Brussel, Artificial Intelligence Lab Pleinlaan 9, 3th floor 1050, Brussels, Belgium; e-mail:","Computer Science Department, Vrije Universiteit Brussel, Artificial Intelligence Lab Pleinlaan 9, 3th floor 1050, Brussels, Belgium"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Department, Vrije Universiteit Brussel, Artificial Intelligence Lab Pleinlaan 9, 3th floor 1050, Brussels, Belgium; e-mail:","institution_ids":["https://openalex.org/I13469542"]},{"raw_affiliation_string":"Computer Science Department, Vrije Universiteit Brussel, Artificial Intelligence Lab Pleinlaan 9, 3th floor 1050, Brussels, Belgium","institution_ids":["https://openalex.org/I13469542"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009587907","display_name":"Daniel Kudenko\u22c6","orcid":"https://orcid.org/0000-0003-3359-3255"},"institutions":[{"id":"https://openalex.org/I4210105947","display_name":"Institute of the Human Brain","ror":"https://ror.org/01ska0903","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210105947","https://openalex.org/I4210115279"]},{"id":"https://openalex.org/I52099693","display_name":"University of York","ror":"https://ror.org/04m01e293","country_code":"GB","type":"education","lineage":["https://openalex.org/I52099693"]}],"countries":["GB","RU"],"is_corresponding":false,"raw_author_name":"Daniel Kudenko","raw_affiliation_strings":["Computer Science Department, University of York, Deramore Ln, Heslington, York YO10 5GH, United Kingdom; e-mail:","Jet Brains Research, St Petersburg, Russia; e-mail:","Jet Brains Research, St Petersburg, Russia","Computer Science Department, University of York, Deramore Ln, Heslington, York YO10 5GH, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Department, University of York, Deramore Ln, Heslington, York YO10 5GH, United Kingdom; e-mail:","institution_ids":["https://openalex.org/I52099693"]},{"raw_affiliation_string":"Jet Brains Research, St Petersburg, Russia; e-mail:","institution_ids":["https://openalex.org/I4210105947"]},{"raw_affiliation_string":"Jet Brains Research, St Petersburg, Russia","institution_ids":[]},{"raw_affiliation_string":"Computer Science Department, University of York, Deramore Ln, Heslington, York YO10 5GH, United Kingdom","institution_ids":["https://openalex.org/I52099693"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100355195"],"corresponding_institution_ids":["https://openalex.org/I52099693"],"apc_list":null,"apc_paid":null,"fwci":0.2902,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.6609187,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":95},"biblio":{"volume":"34","issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9715999960899353,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9710000157356262,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8888548612594604},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7880132794380188},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7086631059646606},{"id":"https://openalex.org/keywords/introspection","display_name":"Introspection","score":0.6852896213531494},{"id":"https://openalex.org/keywords/queue","display_name":"Queue","score":0.6533094644546509},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5535269975662231},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.5507331490516663},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5439943075180054},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.48540958762168884},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4406549632549286},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.08158490061759949},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.07223758101463318},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0650479793548584}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8888548612594604},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7880132794380188},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7086631059646606},{"id":"https://openalex.org/C129671850","wikidata":"https://www.wikidata.org/wiki/Q210501","display_name":"Introspection","level":2,"score":0.6852896213531494},{"id":"https://openalex.org/C160403385","wikidata":"https://www.wikidata.org/wiki/Q220543","display_name":"Queue","level":2,"score":0.6533094644546509},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5535269975662231},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.5507331490516663},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5439943075180054},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.48540958762168884},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4406549632549286},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.08158490061759949},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.07223758101463318},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0650479793548584},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s0269888919000031","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s0269888919000031","pdf_url":null,"source":{"id":"https://openalex.org/S137506714","display_name":"The Knowledge Engineering Review","issn_l":"0269-8889","issn":["0269-8889","1469-8005"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Knowledge Engineering Review","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"No poverty","score":0.5799999833106995,"id":"https://metadata.un.org/sdg/1"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W46130386","https://openalex.org/W1777239053","https://openalex.org/W1986014385","https://openalex.org/W2061562262","https://openalex.org/W2097381042","https://openalex.org/W2103285838","https://openalex.org/W2113913482","https://openalex.org/W2115668428","https://openalex.org/W2121863487","https://openalex.org/W2137375617","https://openalex.org/W2147750403","https://openalex.org/W2148112459","https://openalex.org/W2151382427","https://openalex.org/W2164419340","https://openalex.org/W2201581102","https://openalex.org/W2202549229","https://openalex.org/W2257979135","https://openalex.org/W2397581010","https://openalex.org/W2419612459","https://openalex.org/W2474718355","https://openalex.org/W2491675558","https://openalex.org/W2539402368","https://openalex.org/W2581945288","https://openalex.org/W2614839826","https://openalex.org/W2912571273","https://openalex.org/W2963523627","https://openalex.org/W3011120880","https://openalex.org/W3147079603","https://openalex.org/W4214717370","https://openalex.org/W4233696721","https://openalex.org/W6601865881","https://openalex.org/W6638088447","https://openalex.org/W6674600207","https://openalex.org/W6675936268","https://openalex.org/W6687321610"],"related_works":["https://openalex.org/W3011591403","https://openalex.org/W3022038857","https://openalex.org/W2907922678","https://openalex.org/W2368899903","https://openalex.org/W2327584824","https://openalex.org/W4220782901","https://openalex.org/W1963934847","https://openalex.org/W4289712363","https://openalex.org/W3119565375","https://openalex.org/W2958051327"],"abstract_inverted_index":{"Abstract":[0],"One":[1],"challenge":[2],"faced":[3],"by":[4,44],"reinforcement":[5,60],"learning":[6,29,52,61,107,161],"(RL)":[7],"agents":[8],"is":[9,17,135],"that":[10,89,179],"in":[11,27,108,124,166,189,191],"many":[12],"environments":[13],"the":[14,24,38,51,94,125,156,160,167,172],"reward":[15,32,43,145],"signal":[16],"sparse,":[18],"leading":[19],"to":[20,36,71,116,140,154],"slow":[21],"improvement":[22],"of":[23,41],"agent\u2019s":[25],"performance":[26],"early":[28],"episodes.":[30],"Potential-based":[31],"shaping":[33],"can":[34,76,151],"help":[35],"resolve":[37],"aforementioned":[39],"issue":[40],"sparse":[42],"incorporating":[45],"an":[46,85],"expert\u2019s":[47,149],"domain":[48,170,177],"knowledge":[49],"into":[50],"through":[53],"a":[54,72,109,117],"potential":[55,73],"function.":[56],"Past":[57],"work":[58],"on":[59],"from":[62],"demonstration":[63,70,139,150],"(RLfD)":[64],"directly":[65],"mapped":[66],"(sub-optimal)":[67],"human":[68,148],"expert":[69],"function,":[74],"which":[75],"speed":[77,141],"up":[78,93,142],"RL.":[79],"In":[80],"this":[81],"paper":[82],"we":[83],"propose":[84],"introspective":[86,97],"RL":[87,98,143,185],"agent":[88,99],"significantly":[90,182],"further":[91],"speeds":[92],"learning.":[95],"An":[96],"records":[100],"its":[101],"state\u2013action":[102],"decisions":[103,129],"and":[104,171,186],"experience":[105],"during":[106],"priority":[110,157],"queue.":[111],"Good":[112],"quality":[113],"decisions,":[114],"according":[115],"Monte":[118],"Carlo":[119],"estimation,":[120],"will":[121,130],"be":[122,131,152],"kept":[123],"queue,":[126],"while":[127],"poorer":[128],"rejected.":[132],"The":[133],"queue":[134,158],"then":[136],"used":[137,153],"as":[138],"via":[144],"shaping.":[146],"A":[147],"initialize":[155],"before":[159],"process":[162],"starts.":[163],"Experimental":[164],"validation":[165],"4-dimensional":[168],"CartPole":[169],"27-dimensional":[173],"Super":[174],"Mario":[175],"AI":[176],"shows":[178],"our":[180],"approach":[181],"outperforms":[183],"non-introspective":[184],"state-of-the-art":[187],"approaches":[188],"RLfD":[190],"both":[192],"domains.":[193]},"counts_by_year":[{"year":2021,"cited_by_count":2}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
