{"id":"https://openalex.org/W2978336958","doi":"https://doi.org/10.1109/ijcnn.2019.8852326","title":"Exploiting Action-Value Uncertainty to Drive Exploration in Reinforcement Learning","display_name":"Exploiting Action-Value Uncertainty to Drive Exploration in Reinforcement Learning","publication_year":2019,"publication_date":"2019-07-01","ids":{"openalex":"https://openalex.org/W2978336958","doi":"https://doi.org/10.1109/ijcnn.2019.8852326","mag":"2978336958"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn.2019.8852326","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2019.8852326","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063752250","display_name":"Carlo D\u2019Eramo","orcid":"https://orcid.org/0000-0003-2712-118X"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Carlo D'Eramo","raw_affiliation_strings":["DEIB, Politecnico di Milano, Milano, Italy"],"affiliations":[{"raw_affiliation_string":"DEIB, Politecnico di Milano, Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084050368","display_name":"Andrea Cini","orcid":"https://orcid.org/0000-0003-3219-9360"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Andrea Cini","raw_affiliation_strings":["DEIB, Politecnico di Milano, Milano, Italy"],"affiliations":[{"raw_affiliation_string":"DEIB, Politecnico di Milano, Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017130830","display_name":"Marcello Restelli","orcid":"https://orcid.org/0000-0002-6322-1076"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marcello Restelli","raw_affiliation_strings":["DEIB, Politecnico di Milano, Milano, Italy"],"affiliations":[{"raw_affiliation_string":"DEIB, Politecnico di Milano, Milano, Italy","institution_ids":["https://openalex.org/I93860229"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5063752250"],"corresponding_institution_ids":["https://openalex.org/I93860229"],"apc_list":null,"apc_paid":null,"fwci":0.7922,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.76673058,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8940881490707397},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7680114507675171},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5403940677642822},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.5342822074890137},{"id":"https://openalex.org/keywords/dilemma","display_name":"Dilemma","score":0.5217926502227783},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5135726928710938},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.511148989200592},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4422123432159424},{"id":"https://openalex.org/keywords/thompson-sampling","display_name":"Thompson sampling","score":0.4157668352127075},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4019168019294739},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3716558516025543},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11468309164047241},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.10914584994316101}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8940881490707397},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7680114507675171},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5403940677642822},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.5342822074890137},{"id":"https://openalex.org/C2778496695","wikidata":"https://www.wikidata.org/wiki/Q254128","display_name":"Dilemma","level":2,"score":0.5217926502227783},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5135726928710938},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.511148989200592},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4422123432159424},{"id":"https://openalex.org/C73602740","wikidata":"https://www.wikidata.org/wiki/Q7795822","display_name":"Thompson sampling","level":3,"score":0.4157668352127075},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4019168019294739},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3716558516025543},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11468309164047241},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.10914584994316101},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ijcnn.2019.8852326","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2019.8852326","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"pmh:oai:re.public.polimi.it:11311/1121459","is_oa":false,"landing_page_url":"http://hdl.handle.net/11311/1121459","pdf_url":null,"source":{"id":"https://openalex.org/S4306400312","display_name":"Virtual Community of Pathological Anatomy (University of Castilla La Mancha)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79189158","host_organization_name":"University of Castilla-La Mancha","host_organization_lineage":["https://openalex.org/I79189158"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6899999976158142,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W107583932","https://openalex.org/W172298727","https://openalex.org/W1496855202","https://openalex.org/W1542595278","https://openalex.org/W1582436621","https://openalex.org/W1591803298","https://openalex.org/W1850488217","https://openalex.org/W1980516134","https://openalex.org/W2009551863","https://openalex.org/W2030501831","https://openalex.org/W2039522160","https://openalex.org/W2097931172","https://openalex.org/W2103012681","https://openalex.org/W2108738385","https://openalex.org/W2111764152","https://openalex.org/W2115211925","https://openalex.org/W2124352385","https://openalex.org/W2129670787","https://openalex.org/W2139612737","https://openalex.org/W2145339207","https://openalex.org/W2155968351","https://openalex.org/W2158125716","https://openalex.org/W2161966552","https://openalex.org/W2168405694","https://openalex.org/W2169596848","https://openalex.org/W2173564293","https://openalex.org/W2280163991","https://openalex.org/W2469051754","https://openalex.org/W2489939061","https://openalex.org/W2561776174","https://openalex.org/W2746553466","https://openalex.org/W2751973545","https://openalex.org/W2949475445","https://openalex.org/W2951799221","https://openalex.org/W2962767126","https://openalex.org/W2963169817","https://openalex.org/W2963276097","https://openalex.org/W2963523627","https://openalex.org/W2963938771","https://openalex.org/W3020125231","https://openalex.org/W4214717370","https://openalex.org/W4233413206","https://openalex.org/W4234228486","https://openalex.org/W4285719527","https://openalex.org/W4293871015","https://openalex.org/W6604362843","https://openalex.org/W6629614444","https://openalex.org/W6632504492","https://openalex.org/W6634907821","https://openalex.org/W6635327058","https://openalex.org/W6639039794","https://openalex.org/W6674777300","https://openalex.org/W6676077707","https://openalex.org/W6676833704","https://openalex.org/W6683603353","https://openalex.org/W6685444567","https://openalex.org/W6695011786","https://openalex.org/W6717230150","https://openalex.org/W6730641667","https://openalex.org/W6739455066","https://openalex.org/W7000276917","https://openalex.org/W7062032628"],"related_works":["https://openalex.org/W2350209916","https://openalex.org/W2475524763","https://openalex.org/W2363851383","https://openalex.org/W4296209631","https://openalex.org/W2354517252","https://openalex.org/W2387460998","https://openalex.org/W2356631479","https://openalex.org/W2386467653","https://openalex.org/W2482596426","https://openalex.org/W4386266213"],"abstract_inverted_index":{"Most":[0],"of":[1,22,34,40,51,84,106,112,117,177,183],"the":[2,15,18,29,32,38,49,82,104,107,110,153,181],"research":[3],"in":[4,79,86,93,114,139,144,190],"Reinforcement":[5],"Learning":[6],"(RL)":[7],"focuses":[8],"on":[9,172],"balancing":[10],"exploration":[11],"and":[12,37,141,155,160,164,192],"exploitation.":[13],"Indeed,":[14],"reasons":[16],"for":[17],"success":[19,83],"or":[20],"failure":[21],"an":[23,169,173],"RL":[24,140,143,178,195],"algorithm":[25],"often":[26],"deal":[27],"with":[28],"choice":[30],"between":[31],"execution":[33],"exploratory":[35],"actions":[36,41],"exploitation":[39],"that":[42],"are":[43],"known":[44],"to":[45,99,136],"be":[46],"good.":[47],"In":[48,61],"context":[50],"Multi-Armed":[52],"Bandits":[53],"(MABs),":[54],"many":[55],"algorithms":[56,135,151],"have":[57],"addressed":[58],"this":[59],"dilemma.":[60],"particular,":[62],"Thompson":[63],"Sampling":[64],"(TS)":[65],"is":[66],"a":[67,125,145],"solution":[68],"that,":[69],"besides":[70],"having":[71],"good":[72],"theoretical":[73,156],"properties,":[74],"usually":[75],"works":[76],"very":[77,101],"well":[78],"practice.":[80],"Unfortunately,":[81],"TS":[85,113,138,184],"MAB":[87],"problems":[88],"has":[89,97],"not":[90],"been":[91],"replicated":[92],"RL,":[94,115],"where":[95],"it":[96],"shown":[98],"scale":[100],"poorly":[102],"w.r.t.":[103,185],"dimensionality":[105],"problem.":[108],"Nevertheless,":[109],"application":[111],"instead":[116],"more":[118,193],"myopic":[119],"strategies":[120,188],"such":[121,131],"as":[122],"\u03b5-greedy,":[123],"remains":[124],"promising":[126],"solution.":[127],"This":[128],"paper":[129],"addresses":[130],"issue":[132],"proposing":[133],"several":[134],"use":[137],"deep":[142],"feasible":[146],"way.":[147],"We":[148],"present":[149],"these":[150],"explaining":[152],"intuitions":[154],"considerations":[157],"behind":[158],"them":[159],"discussing":[161],"their":[162],"advantages":[163],"drawbacks.":[165],"Furthermore,":[166],"we":[167],"provide":[168],"empirical":[170],"evaluation":[171],"increasingly":[174],"complex":[175],"set":[176],"problems,":[179],"showing":[180],"benefit":[182],"other":[186],"sampling":[187],"available":[189],"classical":[191],"recent":[194],"literature.":[196]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2}],"updated_date":"2025-12-11T00:21:10.989143","created_date":"2025-10-10T00:00:00"}
