{"id":"https://openalex.org/W3022923336","doi":"https://doi.org/10.3390/a13050118","title":"Distributional Reinforcement Learning with Ensembles","display_name":"Distributional Reinforcement Learning with Ensembles","publication_year":2020,"publication_date":"2020-05-07","ids":{"openalex":"https://openalex.org/W3022923336","doi":"https://doi.org/10.3390/a13050118","mag":"3022923336"},"language":"en","primary_location":{"id":"doi:10.3390/a13050118","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a13050118","pdf_url":"https://www.mdpi.com/1999-4893/13/5/118/pdf?version=1590054661","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/13/5/118/pdf?version=1590054661","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Bj\u00f6rn Lindenberg","orcid":"https://orcid.org/0000-0003-2756-5186"},"institutions":[{"id":"https://openalex.org/I223464139","display_name":"Linnaeus University","ror":"https://ror.org/00j9qag85","country_code":"SE","type":"education","lineage":["https://openalex.org/I223464139"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Bj\u00f6rn Lindenberg","raw_affiliation_strings":["Department of Mathematics, Linn\u00e6us University, 351 95 V\u00e4xj\u00f6, Sweden"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics, Linn\u00e6us University, 351 95 V\u00e4xj\u00f6, Sweden","institution_ids":["https://openalex.org/I223464139"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jonas Nordqvist","orcid":"https://orcid.org/0000-0002-0510-6782"},"institutions":[{"id":"https://openalex.org/I223464139","display_name":"Linnaeus University","ror":"https://ror.org/00j9qag85","country_code":"SE","type":"education","lineage":["https://openalex.org/I223464139"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Jonas Nordqvist","raw_affiliation_strings":["Department of Mathematics, Linn\u00e6us University, 351 95 V\u00e4xj\u00f6, Sweden"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics, Linn\u00e6us University, 351 95 V\u00e4xj\u00f6, Sweden","institution_ids":["https://openalex.org/I223464139"]}]},{"author_position":"last","author":{"id":null,"display_name":"Karl-Olof Lindahl","orcid":"https://orcid.org/0000-0002-7825-4428"},"institutions":[{"id":"https://openalex.org/I223464139","display_name":"Linnaeus University","ror":"https://ror.org/00j9qag85","country_code":"SE","type":"education","lineage":["https://openalex.org/I223464139"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Karl-Olof Lindahl","raw_affiliation_strings":["Department of Mathematics, Linn\u00e6us University, 351 95 V\u00e4xj\u00f6, Sweden"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics, Linn\u00e6us University, 351 95 V\u00e4xj\u00f6, Sweden","institution_ids":["https://openalex.org/I223464139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I223464139"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":983,"currency":"EUR","value_usd":1060},"fwci":0.1371,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.53580807,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"13","issue":"5","first_page":"118","last_page":"118"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8557000160217285,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8557000160217285,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.018799999728798866,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.006899999920278788,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8428999781608582},{"id":"https://openalex.org/keywords/categorical-variable","display_name":"Categorical variable","score":0.7588000297546387},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5609999895095825},{"id":"https://openalex.org/keywords/extension","display_name":"Extension (predicate logic)","score":0.5357999801635742},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.43630000948905945},{"id":"https://openalex.org/keywords/learning-classifier-system","display_name":"Learning classifier system","score":0.3075999915599823}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8428999781608582},{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.7588000297546387},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6428999900817871},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6388000249862671},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5609999895095825},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.5357999801635742},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5216000080108643},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.43630000948905945},{"id":"https://openalex.org/C199190896","wikidata":"https://www.wikidata.org/wiki/Q3509276","display_name":"Learning classifier system","level":3,"score":0.3075999915599823},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.29840001463890076},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.2572000026702881},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.25189998745918274}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.3390/a13050118","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a13050118","pdf_url":"https://www.mdpi.com/1999-4893/13/5/118/pdf?version=1590054661","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2003.10903","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2003.10903","pdf_url":"https://arxiv.org/pdf/2003.10903","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:DiVA.org:lnu-94230","is_oa":true,"landing_page_url":"http://urn.kb.se/resolve?urn=urn:nbn:se:lnu:diva-94230","pdf_url":null,"source":{"id":"https://openalex.org/S4306401598","display_name":"DiVA (Linnaeus University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I223464139","host_organization_name":"Linnaeus University","host_organization_lineage":["https://openalex.org/I223464139"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article in journal"},{"id":"pmh:oai:doaj.org/article:64ca98823fb9428f86ef171e948b7df9","is_oa":true,"landing_page_url":"https://doaj.org/article/64ca98823fb9428f86ef171e948b7df9","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 13, Iss 5, p 118 (2020)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/1999-4893/13/5/118/","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3390/a13050118","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/a13050118","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a13050118","pdf_url":"https://www.mdpi.com/1999-4893/13/5/118/pdf?version=1590054661","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3022923336.pdf","grobid_xml":"https://content.openalex.org/works/W3022923336.grobid-xml"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W1581437610","https://openalex.org/W2043806097","https://openalex.org/W2044676734","https://openalex.org/W2060541277","https://openalex.org/W2145339207","https://openalex.org/W2150468603","https://openalex.org/W2296872882","https://openalex.org/W2739473244","https://openalex.org/W2765302304","https://openalex.org/W2788086877","https://openalex.org/W2905224739","https://openalex.org/W2911933223","https://openalex.org/W2914037023","https://openalex.org/W4212883601","https://openalex.org/W4244908355","https://openalex.org/W4248996458","https://openalex.org/W6681548226","https://openalex.org/W6751629939"],"related_works":[],"abstract_inverted_index":{"It":[0],"is":[1],"well":[2],"known":[3],"that":[4,59],"ensemble":[5],"methods":[6],"often":[7],"provide":[8],"enhanced":[9],"performance":[10,72],"in":[11],"reinforcement":[12,29,39],"learning.":[13],"In":[14],"this":[15,19,60],"paper,":[16],"we":[17,33],"explore":[18],"concept":[20],"further":[21],"by":[22,53],"using":[23],"group-aided":[24],"training":[25],"within":[26],"the":[27,49],"distributional":[28,42],"learning":[30,43],"paradigm.":[31],"Specifically,":[32],"propose":[34],"an":[35,54],"extension":[36],"to":[37,63],"categorical":[38],"learning,":[40,68],"where":[41],"targets":[44],"are":[45],"implicitly":[46],"based":[47],"on":[48,77],"total":[50],"information":[51],"gathered":[52],"ensemble.":[55],"We":[56],"empirically":[57],"show":[58],"may":[61],"lead":[62],"much":[64],"more":[65],"robust":[66],"initial":[67],"a":[69,78],"stronger":[70],"individual":[71],"level,":[73],"and":[74],"good":[75],"efficiency":[76],"per-sample":[79],"basis.":[80]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2020-05-13T00:00:00"}
