{"id":"https://openalex.org/W3089441222","doi":"https://doi.org/10.1109/tg.2021.3113644","title":"Student-Initiated Action Advising via Advice Novelty","display_name":"Student-Initiated Action Advising via Advice Novelty","publication_year":2021,"publication_date":"2021-09-20","ids":{"openalex":"https://openalex.org/W3089441222","doi":"https://doi.org/10.1109/tg.2021.3113644","mag":"3089441222"},"language":"en","primary_location":{"id":"doi:10.1109/tg.2021.3113644","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tg.2021.3113644","pdf_url":null,"source":{"id":"https://openalex.org/S4210224842","display_name":"IEEE Transactions on Games","issn_l":"2475-1502","issn":["2475-1502","2475-1510"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Games","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2010.00381","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101585566","display_name":"Erc\u00fcment \u0130lhan","orcid":"https://orcid.org/0000-0003-0400-0043"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ercument Ilhan","raw_affiliation_strings":["School of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K"],"raw_orcid":"https://orcid.org/0000-0003-0400-0043","affiliations":[{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008957116","display_name":"Jeremy Gow","orcid":"https://orcid.org/0009-0004-2768-6898"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jeremy Gow","raw_affiliation_strings":["Department of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058274276","display_name":"Diego P\u00e9rez-Li\u00e9bana","orcid":"https://orcid.org/0000-0003-1958-0212"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Diego Perez","raw_affiliation_strings":["School of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K"],"raw_orcid":"https://orcid.org/0000-0003-1958-0212","affiliations":[{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I166337079"],"apc_list":null,"apc_paid":null,"fwci":0.4086,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.65866828,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"14","issue":"3","first_page":"522","last_page":"532"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.8153266906738281},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7104109525680542},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6498273611068726},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.6411920189857483},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6305065155029297},{"id":"https://openalex.org/keywords/advice","display_name":"Advice (programming)","score":0.6145970821380615},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.45327723026275635},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38936445116996765},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.24172773957252502},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.099090576171875}],"concepts":[{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.8153266906738281},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7104109525680542},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6498273611068726},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.6411920189857483},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6305065155029297},{"id":"https://openalex.org/C2779955035","wikidata":"https://www.wikidata.org/wiki/Q4686785","display_name":"Advice (programming)","level":2,"score":0.6145970821380615},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.45327723026275635},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38936445116996765},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.24172773957252502},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.099090576171875},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/tg.2021.3113644","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tg.2021.3113644","pdf_url":null,"source":{"id":"https://openalex.org/S4210224842","display_name":"IEEE Transactions on Games","issn_l":"2475-1502","issn":["2475-1502","2475-1510"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Games","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2010.00381","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2010.00381","pdf_url":"https://arxiv.org/pdf/2010.00381","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3089441222","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2010.00381.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:qmro.qmul.ac.uk:123456789/76458","is_oa":false,"landing_page_url":"https://qmro.qmul.ac.uk/xmlui/handle/123456789/76458","pdf_url":null,"source":{"id":"https://openalex.org/S4306400530","display_name":"Queen Mary Research Online (Queen Mary University of London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I166337079","host_organization_name":"Queen Mary University of London","host_organization_lineage":["https://openalex.org/I166337079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"doi:10.48550/arxiv.2010.00381","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2010.00381","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2010.00381","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2010.00381","pdf_url":"https://arxiv.org/pdf/2010.00381","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320311061","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W1529399279","https://openalex.org/W1757796397","https://openalex.org/W2148112459","https://openalex.org/W2155968351","https://openalex.org/W2173564293","https://openalex.org/W2563829177","https://openalex.org/W2620645529","https://openalex.org/W2735506162","https://openalex.org/W2739573821","https://openalex.org/W2745868649","https://openalex.org/W2746553466","https://openalex.org/W2761873684","https://openalex.org/W2772709170","https://openalex.org/W2788862220","https://openalex.org/W2903158431","https://openalex.org/W2903242674","https://openalex.org/W2921489897","https://openalex.org/W2921578896","https://openalex.org/W2963376229","https://openalex.org/W2963390684","https://openalex.org/W2963477884","https://openalex.org/W2963704132","https://openalex.org/W2963983495","https://openalex.org/W2964067469","https://openalex.org/W2964161785","https://openalex.org/W2964174623","https://openalex.org/W2964291307","https://openalex.org/W2965435131","https://openalex.org/W2971262355","https://openalex.org/W2976108375","https://openalex.org/W2981539019","https://openalex.org/W2982852993","https://openalex.org/W2995453501","https://openalex.org/W2996037775","https://openalex.org/W2996868001","https://openalex.org/W3099324303","https://openalex.org/W3100789280","https://openalex.org/W3152502122","https://openalex.org/W3173894685","https://openalex.org/W4246078117","https://openalex.org/W6631533588","https://openalex.org/W6637967152","https://openalex.org/W6682849425","https://openalex.org/W6685444567","https://openalex.org/W6687681856","https://openalex.org/W6704460705","https://openalex.org/W6730844258","https://openalex.org/W6740092555","https://openalex.org/W6741054924","https://openalex.org/W6746177919","https://openalex.org/W6756303580","https://openalex.org/W6756615331","https://openalex.org/W6757469721","https://openalex.org/W6760256100","https://openalex.org/W6760796257","https://openalex.org/W6763704811","https://openalex.org/W6769030669","https://openalex.org/W6772005887","https://openalex.org/W6793862206"],"related_works":["https://openalex.org/W3152502122","https://openalex.org/W3070006935","https://openalex.org/W2976108375","https://openalex.org/W3135313656","https://openalex.org/W2735506162","https://openalex.org/W3005521394","https://openalex.org/W2398817920","https://openalex.org/W3209811075","https://openalex.org/W3167570135","https://openalex.org/W1565384094","https://openalex.org/W2759104452","https://openalex.org/W3167061735","https://openalex.org/W3022194887","https://openalex.org/W2775713457","https://openalex.org/W2804726947","https://openalex.org/W2918241733","https://openalex.org/W121023703","https://openalex.org/W3094142671","https://openalex.org/W2162277224","https://openalex.org/W3121003321"],"abstract_inverted_index":{"Action":[0],"advising":[1],"is":[2,81],"a":[3,121,134,151],"budget-constrained":[4],"knowledge":[5],"exchange":[6],"mechanism":[7],"between":[8,98],"teacher\u2013student":[9],"peers":[10],"that":[11,29,54,137,167,191],"can":[12,68],"help":[13],"tackle":[14],"exploration":[15],"and":[16,33,101,186,203],"sample":[17],"inefficiency":[18],"problems":[19],"in":[20,71,107,112,124,182,207],"deep":[21],"reinforcement":[22],"learning":[23,171],"(RL).":[24],"Most":[25],"recently,":[26],"student-initiated":[27,135],"techniques":[28],"utilize":[30],"state":[31,199],"novelty":[32,149],"uncertainty":[34],"estimations":[35,46],"have":[36,47],"obtained":[37],"promising":[38],"results.":[39],"However,":[40],"the":[41,55,58,79,91,96,108,113,116,126,148,162,168,179,198,201,208,211],"approaches":[42],"built":[43],"on":[44,77,195],"these":[45,139],"some":[48],"potential":[49],"weaknesses.":[50],"First,":[51],"they":[52],"assume":[53],"convergence":[56],"of":[57,115,150,153,200],"student\u2019s":[59,169],"RL":[60,109],"model":[61,110],"implies":[62],"less":[63],"need":[64],"for":[65,161],"advice.":[66,154],"This":[67],"be":[69],"misleading":[70],"scenarios":[72,209],"with":[73,197],"teacher\u2019s":[74,92],"absence":[75],"early":[76],"where":[78,210],"student":[80,127],"likely":[82],"to":[83,104,146,165,177,216],"learn":[84],"suboptimally":[85],"by":[86,140],"itself;":[87],"yet":[88],"also":[89],"ignore":[90],"assistance":[93],"later.":[94],"Second,":[95],"delays":[97],"encountering":[99],"states":[100,164],"having":[102],"them":[103],"take":[105],"effect":[106],"updates":[111,159],"presence":[114],"experience":[117],"replay":[118],"dynamics":[119],"cause":[120],"feedback":[122],"lag":[123],"what":[125],"actually":[128],"needs":[129],"advice":[130],"for.":[131],"We":[132],"propose":[133],"algorithm":[136],"alleviates":[138],"employing":[141],"random":[142],"network":[143],"distillation":[144],"(RND)":[145],"measure":[147],"piece":[152],"Furthermore,":[155],"we":[156],"perform":[157],"RND":[158],"only":[160],"advised":[163],"ensure":[166],"own":[170],"does":[172],"not":[173],"impair":[174],"its":[175],"ability":[176],"leverage":[178],"teacher.":[180],"Experiments":[181],"<italic":[183,187],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[184,188],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">GridWorld</i>":[185],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">MinAtar</i>":[189],"show":[190],"our":[192],"approach":[193],"performs":[194],"par":[196],"art":[202],"demonstrates":[204],"significant":[205],"advantages":[206],"existing":[212],"methods":[213],"are":[214],"prone":[215],"fail.":[217]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2026-07-03T08:13:44.112507","created_date":"2025-10-10T00:00:00"}
