{"id":"https://openalex.org/W3154061550","doi":"https://doi.org/10.1109/cog52621.2021.9619033","title":"Learning on a Budget via Teacher Imitation","display_name":"Learning on a Budget via Teacher Imitation","publication_year":2021,"publication_date":"2021-08-17","ids":{"openalex":"https://openalex.org/W3154061550","doi":"https://doi.org/10.1109/cog52621.2021.9619033","mag":"3154061550"},"language":"en","primary_location":{"id":"doi:10.1109/cog52621.2021.9619033","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cog52621.2021.9619033","pdf_url":null,"source":{"id":"https://openalex.org/S4363608335","display_name":"2021 IEEE Conference on Games (CoG)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Conference on Games (CoG)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2104.08440","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101585566","display_name":"Erc\u00fcment \u0130lhan","orcid":"https://orcid.org/0000-0003-0400-0043"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Ercument Ilhan","raw_affiliation_strings":["Queen Mary University of London,School of Electronic Engineering and Computer Science,London,United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,School of Electronic Engineering and Computer Science,London,United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008957116","display_name":"Jeremy Gow","orcid":"https://orcid.org/0009-0004-2768-6898"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jeremy Gow","raw_affiliation_strings":["Queen Mary University of London,School of Electronic Engineering and Computer Science,London,United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,School of Electronic Engineering and Computer Science,London,United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058274276","display_name":"Diego P\u00e9rez-Li\u00e9bana","orcid":"https://orcid.org/0000-0003-1958-0212"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Diego Perez-Liebana","raw_affiliation_strings":["Queen Mary University of London,School of Electronic Engineering and Computer Science,London,United Kingdom"],"affiliations":[{"raw_affiliation_string":"Queen Mary University of London,School of Electronic Engineering and Computer Science,London,United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101585566"],"corresponding_institution_ids":["https://openalex.org/I166337079"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02475751,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.9789999723434448,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8163281679153442},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.7210527658462524},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7105355262756348},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.6484241485595703},{"id":"https://openalex.org/keywords/advice","display_name":"Advice (programming)","score":0.6287148594856262},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6131170988082886},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.5938913226127625},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.5528671741485596},{"id":"https://openalex.org/keywords/competitor-analysis","display_name":"Competitor analysis","score":0.5382210612297058},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.513027548789978},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.5113481879234314},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.48422932624816895},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46026521921157837},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4583640992641449},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3490350842475891},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.08580097556114197}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8163281679153442},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.7210527658462524},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7105355262756348},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.6484241485595703},{"id":"https://openalex.org/C2779955035","wikidata":"https://www.wikidata.org/wiki/Q4686785","display_name":"Advice (programming)","level":2,"score":0.6287148594856262},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6131170988082886},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.5938913226127625},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.5528671741485596},{"id":"https://openalex.org/C127576917","wikidata":"https://www.wikidata.org/wiki/Q624630","display_name":"Competitor analysis","level":2,"score":0.5382210612297058},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.513027548789978},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.5113481879234314},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.48422932624816895},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46026521921157837},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4583640992641449},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3490350842475891},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.08580097556114197},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/cog52621.2021.9619033","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cog52621.2021.9619033","pdf_url":null,"source":{"id":"https://openalex.org/S4363608335","display_name":"2021 IEEE Conference on Games (CoG)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Conference on Games (CoG)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2104.08440","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.08440","pdf_url":"https://arxiv.org/pdf/2104.08440","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:qmro.qmul.ac.uk:123456789/77823","is_oa":true,"landing_page_url":"https://qmro.qmul.ac.uk/xmlui/handle/123456789/77823","pdf_url":"https://qmro.qmul.ac.uk/xmlui/bitstream/123456789/77823/2/Perez%20Learning%20on%20a%20Budget%202021%20Accepted.pdf","source":{"id":"https://openalex.org/S4306400530","display_name":"Queen Mary Research Online (Queen Mary University of London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I166337079","host_organization_name":"Queen Mary University of London","host_organization_lineage":["https://openalex.org/I166337079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Proceeding"},{"id":"mag:3154061550","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2104.08440","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2104.08440","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2104.08440","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2104.08440","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2104.08440","pdf_url":"https://arxiv.org/pdf/2104.08440","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320311061","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1529399279","https://openalex.org/W1757796397","https://openalex.org/W1969685488","https://openalex.org/W2031727428","https://openalex.org/W2051228319","https://openalex.org/W2121863487","https://openalex.org/W2148112459","https://openalex.org/W2150468603","https://openalex.org/W2155968351","https://openalex.org/W2173564293","https://openalex.org/W2563829177","https://openalex.org/W2620645529","https://openalex.org/W2735506162","https://openalex.org/W2786672974","https://openalex.org/W2963376229","https://openalex.org/W2964059111","https://openalex.org/W2964067469","https://openalex.org/W2964291307","https://openalex.org/W2965435131","https://openalex.org/W2976108375","https://openalex.org/W2981030070","https://openalex.org/W2982316857","https://openalex.org/W2982852993","https://openalex.org/W2996868001","https://openalex.org/W3037582147","https://openalex.org/W3089441222","https://openalex.org/W3103780890","https://openalex.org/W3118210634","https://openalex.org/W3152502122","https://openalex.org/W3173894685","https://openalex.org/W3200878391","https://openalex.org/W6617145748","https://openalex.org/W6631533588","https://openalex.org/W6637967152","https://openalex.org/W6685444567","https://openalex.org/W6730844258","https://openalex.org/W6741054924","https://openalex.org/W6744838376","https://openalex.org/W6748645729","https://openalex.org/W6748816842","https://openalex.org/W6756303580","https://openalex.org/W6762257688","https://openalex.org/W6766130189","https://openalex.org/W6769831145","https://openalex.org/W6780070463","https://openalex.org/W6788072495","https://openalex.org/W6793862206"],"related_works":["https://openalex.org/W2951504627","https://openalex.org/W2615334618","https://openalex.org/W3092270557","https://openalex.org/W2121736818","https://openalex.org/W2560186857","https://openalex.org/W2296212956","https://openalex.org/W3162183426","https://openalex.org/W1618761262","https://openalex.org/W3003915040","https://openalex.org/W3096386778","https://openalex.org/W1553039381","https://openalex.org/W3136629921","https://openalex.org/W2796463237","https://openalex.org/W3165299215","https://openalex.org/W1971690354","https://openalex.org/W2581859532","https://openalex.org/W2956596508","https://openalex.org/W98262826","https://openalex.org/W1851637725","https://openalex.org/W2958095304"],"abstract_inverted_index":{"Deep":[0],"Reinforcement":[1],"Learning":[2],"(RL)":[3],"techniques":[4],"can":[5,13],"benefit":[6],"greatly":[7],"from":[8,19,85,112],"leveraging":[9],"prior":[10],"experience,":[11],"which":[12],"be":[14,197,207],"either":[15,183],"self-generated":[16],"or":[17,185],"acquired":[18],"other":[20],"entities.":[21],"Action":[22],"advising":[23],"is":[24,55,62],"a":[25,29,58,130,145],"framework":[26],"that":[27,79,133,180],"provides":[28],"flexible":[30],"way":[31],"to":[32,46,64,99,109,128,147,157,161,163,196,206],"transfer":[33],"such":[34],"knowledge":[35],"in":[36,67,174],"the":[37,47,50,68,86,100,110,120,150],"form":[38],"of":[39,52,122,153],"actions":[40],"between":[41],"teacher-student":[42],"peers.":[43],"However,":[44],"due":[45],"realistic":[48],"concerns,":[49],"number":[51],"these":[53,66,154],"interactions":[54],"limited":[56],"with":[57,166,188],"budget;":[59],"therefore,":[60],"it":[61,97,159],"crucial":[63],"perform":[65],"most":[69],"appropriate":[70],"moments.":[71],"There":[72],"have":[73,93],"been":[74],"several":[75],"promising":[76],"studies":[77],"recently":[78],"address":[80],"this":[81,116],"problem":[82],"setting":[83],"especially":[84],"student's":[87],"perspective.":[88],"Despite":[89],"their":[90],"success,":[91],"they":[92],"some":[94],"shortcomings":[95],"when":[96],"comes":[98],"practical":[101],"applicability":[102],"and":[103,138],"integrity":[104],"as":[105],"an":[106],"overall":[107],"solution":[108],"learning":[111],"advice":[113,123,136,139],"challenge.":[114],"In":[115],"paper,":[117],"we":[118,172],"extend":[119],"idea":[121],"reusing":[124],"via":[125],"teacher":[126],"imitation":[127],"construct":[129],"unified":[131],"approach":[132],"addresses":[134],"both":[135],"collection":[137],"utilisation":[140],"problems.":[141],"We":[142],"also":[143,204],"propose":[144],"method":[146],"automatically":[148],"tune":[149],"relevant":[151],"hyperparameters":[152],"components":[155,202],"on-the-fly":[156],"make":[158],"able":[160],"adapt":[162],"any":[164],"task":[165],"minimal":[167],"human":[168],"intervention.":[169],"The":[170],"experiments":[171],"performed":[173],"5":[175],"different":[176],"Atari":[177],"games":[178],"verify":[179],"our":[181],"algorithm":[182],"surpasses":[184],"performs":[186],"on-par":[187],"its":[189,200],"top":[190],"competitors":[191],"while":[192],"being":[193],"far":[194],"simpler":[195],"employed.":[198],"Furthermore,":[199],"individual":[201],"are":[203],"found":[205],"providing":[208],"significant":[209],"advantages":[210],"alone.":[211]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
