{"id":"https://openalex.org/W2792217087","doi":"https://doi.org/10.1080/09540091.2018.1443318","title":"Improving interactive reinforcement learning: What makes a good teacher?","display_name":"Improving interactive reinforcement learning: What makes a good teacher?","publication_year":2018,"publication_date":"2018-03-01","ids":{"openalex":"https://openalex.org/W2792217087","doi":"https://doi.org/10.1080/09540091.2018.1443318","mag":"2792217087"},"language":"en","primary_location":{"id":"doi:10.1080/09540091.2018.1443318","is_oa":true,"landing_page_url":"https://doi.org/10.1080/09540091.2018.1443318","pdf_url":"https://www.tandfonline.com/doi/pdf/10.1080/09540091.2018.1443318?needAccess=true","source":{"id":"https://openalex.org/S4210188800","display_name":"Connection Science","issn_l":"0954-0091","issn":["0954-0091","1360-0494"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Connection Science","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://www.tandfonline.com/doi/pdf/10.1080/09540091.2018.1443318?needAccess=true","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044098673","display_name":"Francisco Cruz","orcid":"https://orcid.org/0000-0002-1131-3382"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I4210156023","display_name":"Universidad Central de Chile","ror":"https://ror.org/0577avk88","country_code":"CL","type":"education","lineage":["https://openalex.org/I4210156023"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["CL","DE"],"is_corresponding":true,"raw_author_name":"Francisco Cruz","raw_affiliation_strings":["Department of Informatics, Knowledge Technology Group, University of Hamburg, Hamburg, Germany","Escuela de Computaci\u00f3n e Inform\u00e1tica, Facultad de Ingenier\u00eda, Universidad Central de Chile, Chile"],"raw_orcid":"https://orcid.org/0000-0002-1131-3382","affiliations":[{"raw_affiliation_string":"Department of Informatics, Knowledge Technology Group, University of Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]},{"raw_affiliation_string":"Escuela de Computaci\u00f3n e Inform\u00e1tica, Facultad de Ingenier\u00eda, Universidad Central de Chile, Chile","institution_ids":["https://openalex.org/I4210156023"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015567592","display_name":"Sven Magg","orcid":"https://orcid.org/0000-0002-0589-6585"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sven Magg","raw_affiliation_strings":["Department of Informatics, Knowledge Technology Group, University of Hamburg, Hamburg, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Informatics, Knowledge Technology Group, University of Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067742588","display_name":"Yukie Nagai","orcid":"https://orcid.org/0000-0003-4794-0940"},"institutions":[{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yukie Nagai","raw_affiliation_strings":["Emergent Robotics Laboratory, Graduate School of Engineering, Osaka University, Osaka, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Emergent Robotics Laboratory, Graduate School of Engineering, Osaka University, Osaka, Japan","institution_ids":["https://openalex.org/I98285908"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033486668","display_name":"Stefan Wermter","orcid":"https://orcid.org/0000-0003-1343-4775"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Stefan Wermter","raw_affiliation_strings":["Department of Informatics, Knowledge Technology Group, University of Hamburg, Hamburg, Germany"],"raw_orcid":"https://orcid.org/0000-0003-1343-4775","affiliations":[{"raw_affiliation_string":"Department of Informatics, Knowledge Technology Group, University of Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5044098673"],"corresponding_institution_ids":["https://openalex.org/I159176309","https://openalex.org/I4210156023","https://openalex.org/I884043246"],"apc_list":{"value":1270,"currency":"USD","value_usd":1270},"apc_paid":{"value":1270,"currency":"USD","value_usd":1270},"fwci":5.075,"has_fulltext":false,"cited_by_count":45,"citation_normalized_percentile":{"value":0.96156137,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"30","issue":"3","first_page":"306","last_page":"325"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10646","display_name":"Experimental Behavioral Economics Studies","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9794999957084656,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7872821092605591},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7410650849342346},{"id":"https://openalex.org/keywords/trainer","display_name":"Trainer","score":0.6578565835952759},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5576874613761902},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5140472650527954},{"id":"https://openalex.org/keywords/apprenticeship","display_name":"Apprenticeship","score":0.4952196180820465},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49297070503234863},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.4461980164051056},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.32827430963516235},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.18062591552734375},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.12987706065177917}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7872821092605591},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7410650849342346},{"id":"https://openalex.org/C2780463512","wikidata":"https://www.wikidata.org/wiki/Q15122700","display_name":"Trainer","level":2,"score":0.6578565835952759},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5576874613761902},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5140472650527954},{"id":"https://openalex.org/C107806365","wikidata":"https://www.wikidata.org/wiki/Q253567","display_name":"Apprenticeship","level":2,"score":0.4952196180820465},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49297070503234863},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4461980164051056},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.32827430963516235},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.18062591552734375},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.12987706065177917},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.1080/09540091.2018.1443318","is_oa":true,"landing_page_url":"https://doi.org/10.1080/09540091.2018.1443318","pdf_url":"https://www.tandfonline.com/doi/pdf/10.1080/09540091.2018.1443318?needAccess=true","source":{"id":"https://openalex.org/S4210188800","display_name":"Connection Science","issn_l":"0954-0091","issn":["0954-0091","1360-0494"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Connection Science","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1904.06879","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1904.06879","pdf_url":"https://arxiv.org/pdf/1904.06879","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:sedici.unlp.edu.ar:10915/70699","is_oa":true,"landing_page_url":"http://sedici.unlp.edu.ar/handle/10915/70699","pdf_url":null,"source":{"id":"https://openalex.org/S4306400803","display_name":"El Servicio de Difusi\u00f3n de la Creaci\u00f3n Intelectual (National University of La Plata)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I874386039","host_organization_name":"Universidad Nacional de La Plata","host_organization_lineage":["https://openalex.org/I874386039"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Resumen"},{"id":"pmh:oai:doaj.org/article:835776a4bef2442ea8cb9ab5122fdba8","is_oa":false,"landing_page_url":"https://doaj.org/article/835776a4bef2442ea8cb9ab5122fdba8","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Connection Science, Vol 30, Iss 3, Pp 306-325 (2018)","raw_type":"article"},{"id":"pmh:oai:dro.deakin.edu.au:DU:30122604","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401102","display_name":"Own your potential (DEAKIN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149704539","host_organization_name":"Deakin University","host_organization_lineage":["https://openalex.org/I149704539"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"},{"id":"pmh:oai:figshare.com:article/20755789","is_oa":true,"landing_page_url":"https://figshare.com/articles/journal_contribution/Improving_interactive_reinforcement_learning_what_makes_a_good_teacher_/20755789","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1080/09540091.2018.1443318","is_oa":true,"landing_page_url":"https://doi.org/10.1080/09540091.2018.1443318","pdf_url":"https://www.tandfonline.com/doi/pdf/10.1080/09540091.2018.1443318?needAccess=true","source":{"id":"https://openalex.org/S4210188800","display_name":"Connection Science","issn_l":"0954-0091","issn":["0954-0091","1360-0494"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Connection Science","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5899999737739563}],"awards":[{"id":"https://openalex.org/G6276684567","display_name":null,"funder_award_id":"TRR 169","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"},{"id":"https://openalex.org/F4320334812","display_name":"Comisi\u00f3n Nacional de Investigaci\u00f3n Cient\u00edfica y Tecnol\u00f3gica","ror":"https://ror.org/02ap3w078"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2792217087.pdf"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W50296447","https://openalex.org/W121023703","https://openalex.org/W197704362","https://openalex.org/W620953717","https://openalex.org/W1497437446","https://openalex.org/W1529399279","https://openalex.org/W1594602740","https://openalex.org/W1655830068","https://openalex.org/W1688666919","https://openalex.org/W1969685488","https://openalex.org/W1977655452","https://openalex.org/W1983801113","https://openalex.org/W2044320213","https://openalex.org/W2046837952","https://openalex.org/W2081030963","https://openalex.org/W2095989982","https://openalex.org/W2098441518","https://openalex.org/W2098686263","https://openalex.org/W2101786389","https://openalex.org/W2108291454","https://openalex.org/W2115494899","https://openalex.org/W2121728202","https://openalex.org/W2121863487","https://openalex.org/W2124267516","https://openalex.org/W2129659607","https://openalex.org/W2130709397","https://openalex.org/W2137375617","https://openalex.org/W2145339207","https://openalex.org/W2233997862","https://openalex.org/W2237691826","https://openalex.org/W2290053245","https://openalex.org/W2327881649","https://openalex.org/W2547139900","https://openalex.org/W2559960928","https://openalex.org/W2563829177","https://openalex.org/W2620645529","https://openalex.org/W2760506156","https://openalex.org/W2965916140","https://openalex.org/W3139377883","https://openalex.org/W4214717370","https://openalex.org/W4233071312","https://openalex.org/W4253068843","https://openalex.org/W4292927920"],"related_works":["https://openalex.org/W2130264791","https://openalex.org/W4235622043","https://openalex.org/W2155887593","https://openalex.org/W2102428166","https://openalex.org/W1600547024","https://openalex.org/W3125491562","https://openalex.org/W2239395319","https://openalex.org/W2573907782","https://openalex.org/W2980420562","https://openalex.org/W2141338891"],"abstract_inverted_index":{"Interactive":[0],"reinforcement":[1,16],"learning":[2,17],"(IRL)":[3],"has":[4],"become":[5,99],"an":[6,74,113],"important":[7],"apprenticeship":[8,160],"approach":[9],"to":[10,35,40,71,91,98,109,116,129,152],"speed":[11],"up":[12],"convergence":[13,122],"in":[14,64,134,150,158],"classic":[15],"(RL)":[18],"problems.":[19],"In":[20,79],"this":[21,80],"regard,":[22],"a":[23,32,100,104,110,117,130],"variant":[24],"of":[25,88,123,136,141,165],"IRL":[26],"is":[27,167],"policy":[28],"shaping":[29],"which":[30,63,93],"uses":[31],"parent-like":[33],"trainer":[34,57],"propose":[36],"the":[37,48,56,124,137,142,159,163],"next":[38],"action":[39],"be":[41,59],"performed":[42],"and":[43,86,120,127],"by":[44,51],"doing":[45],"so":[46],"reduces":[47],"search":[49],"space":[50],"advice.":[52],"On":[53],"some":[54],"occasions,":[55],"may":[58,95],"another":[60],"artificial":[61,89],"agent":[62,94],"turn":[65],"was":[66],"trained":[67],"using":[68],"RL":[69],"methods":[70],"afterward":[72],"becoming":[73],"advisor":[75,114],"for":[76],"other":[77],"learner-agents.":[78,143],"work,":[81],"we":[82,145],"analyse":[83,146],"internal":[84],"representations":[85],"characteristics":[87],"agents":[90],"determine":[92,153],"outperform":[96],"others":[97],"better":[101],"trainer-agent.":[102],"Using":[103],"polymath":[105],"agent,":[106,112],"as":[107],"compared":[108],"specialist":[111],"leads":[115],"larger":[118],"reward":[119,125],"faster":[121],"signal":[126],"also":[128],"more":[131,169],"stable":[132],"behaviour":[133],"terms":[135],"state":[138],"visit":[139],"frequency":[140],"Moreover,":[144],"system":[147],"interaction":[148],"parameters":[149],"order":[151],"how":[154],"influential":[155],"they":[156],"are":[157],"process,":[161],"where":[162],"consistency":[164],"feedback":[166],"much":[168],"relevant":[170],"when":[171],"dealing":[172],"with":[173],"different":[174],"learner":[175],"obedience":[176],"parameters.":[177]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":18},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1}],"updated_date":"2026-05-22T06:13:13.366637","created_date":"2025-10-10T00:00:00"}
