{"id":"https://openalex.org/W4382981485","doi":"https://doi.org/10.3390/a16070325","title":"Risk-Sensitive Policy with Distributional Reinforcement Learning","display_name":"Risk-Sensitive Policy with Distributional Reinforcement Learning","publication_year":2023,"publication_date":"2023-06-30","ids":{"openalex":"https://openalex.org/W4382981485","doi":"https://doi.org/10.3390/a16070325"},"language":"en","primary_location":{"id":"doi:10.3390/a16070325","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a16070325","pdf_url":"https://www.mdpi.com/1999-4893/16/7/325/pdf?version=1688114946","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/16/7/325/pdf?version=1688114946","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034891368","display_name":"Thibaut Th\u00e9ate","orcid":"https://orcid.org/0000-0001-8218-5309"},"institutions":[{"id":"https://openalex.org/I157674565","display_name":"University of Li\u00e8ge","ror":"https://ror.org/00afp2z80","country_code":"BE","type":"education","lineage":["https://openalex.org/I157674565"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Thibaut Th\u00e9ate","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of Li\u00e8ge, 4031 Li\u00e8ge, Belgium"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of Li\u00e8ge, 4031 Li\u00e8ge, Belgium","institution_ids":["https://openalex.org/I157674565"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077011518","display_name":"Damien Ernst","orcid":"https://orcid.org/0000-0002-3035-8260"},"institutions":[{"id":"https://openalex.org/I157674565","display_name":"University of Li\u00e8ge","ror":"https://ror.org/00afp2z80","country_code":"BE","type":"education","lineage":["https://openalex.org/I157674565"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Damien Ernst","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, University of Li\u00e8ge, 4031 Li\u00e8ge, Belgium","Information Processing and Communications Laboratory, Institut Polytechnique de Paris, 91120 Paris, France"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, University of Li\u00e8ge, 4031 Li\u00e8ge, Belgium","institution_ids":["https://openalex.org/I157674565"]},{"raw_affiliation_string":"Information Processing and Communications Laboratory, Institut Polytechnique de Paris, 91120 Paris, France","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5034891368"],"corresponding_institution_ids":["https://openalex.org/I157674565"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1730,"currency":"EUR","value_usd":1865},"fwci":1.9165,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.88525021,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"16","issue":"7","first_page":"325","last_page":"325"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9674000144004822,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.8891841769218445},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8251739740371704},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6650140285491943},{"id":"https://openalex.org/keywords/expected-utility-hypothesis","display_name":"Expected utility hypothesis","score":0.4880286753177643},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.4727048873901367},{"id":"https://openalex.org/keywords/expected-return","display_name":"Expected return","score":0.46425846219062805},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.40626075863838196},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39396050572395325},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33879417181015015},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1742061972618103},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.1704760491847992},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.15717360377311707}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.8891841769218445},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8251739740371704},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6650140285491943},{"id":"https://openalex.org/C205706631","wikidata":"https://www.wikidata.org/wiki/Q2319304","display_name":"Expected utility hypothesis","level":2,"score":0.4880286753177643},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.4727048873901367},{"id":"https://openalex.org/C154611145","wikidata":"https://www.wikidata.org/wiki/Q4331879","display_name":"Expected return","level":3,"score":0.46425846219062805},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.40626075863838196},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39396050572395325},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33879417181015015},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1742061972618103},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.1704760491847992},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.15717360377311707},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C106159729","wikidata":"https://www.wikidata.org/wiki/Q2294553","display_name":"Financial economics","level":1,"score":0.0},{"id":"https://openalex.org/C2780821815","wikidata":"https://www.wikidata.org/wiki/Q5340806","display_name":"Portfolio","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/a16070325","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a16070325","pdf_url":"https://www.mdpi.com/1999-4893/16/7/325/pdf?version=1688114946","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:orbi.ulg.ac.be:2268/297883","is_oa":true,"landing_page_url":"https://orbi.uliege.be/handle/2268/297883","pdf_url":"https://orbi.uliege.be/bitstream/2268/297883/1/algorithms-16-00325.pdf","source":{"id":"https://openalex.org/S4306400651","display_name":"Open Repository and Bibliography (University of Li\u00e8ge)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I157674565","host_organization_name":"University of Li\u00e8ge","host_organization_lineage":["https://openalex.org/I157674565"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, 16 (325), 16 (2023-06-30)","raw_type":"peer reviewed"},{"id":"pmh:oai:doaj.org/article:ff42163ea0b7452780a96a86130457be","is_oa":true,"landing_page_url":"https://doaj.org/article/ff42163ea0b7452780a96a86130457be","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 16, Iss 7, p 325 (2023)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/1999-4893/16/7/325/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/a16070325","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms; Volume 16; Issue 7; Pages: 325","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/a16070325","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a16070325","pdf_url":"https://www.mdpi.com/1999-4893/16/7/325/pdf?version=1688114946","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7900000214576721,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321390","display_name":"Fonds De La Recherche Scientifique - FNRS","ror":"https://ror.org/03q83t159"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4382981485.pdf"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1845972764","https://openalex.org/W2169206416","https://openalex.org/W2529477964","https://openalex.org/W2739473244","https://openalex.org/W2803308811","https://openalex.org/W2904730732","https://openalex.org/W2945619782","https://openalex.org/W2949963197","https://openalex.org/W2963590277","https://openalex.org/W2964108826","https://openalex.org/W3026652478","https://openalex.org/W3121045039","https://openalex.org/W3121342653","https://openalex.org/W3123216837","https://openalex.org/W3124407081","https://openalex.org/W3126577088","https://openalex.org/W3130263801","https://openalex.org/W3150718622","https://openalex.org/W3162902207","https://openalex.org/W3173552545","https://openalex.org/W4283396661","https://openalex.org/W6680547686","https://openalex.org/W6685027638","https://openalex.org/W6687063787","https://openalex.org/W6735677848","https://openalex.org/W6766581138","https://openalex.org/W6770012831"],"related_works":["https://openalex.org/W2905433371","https://openalex.org/W4390569940","https://openalex.org/W2888392564","https://openalex.org/W4361193272","https://openalex.org/W4310278675","https://openalex.org/W2765155366","https://openalex.org/W3030933983","https://openalex.org/W2606882695","https://openalex.org/W2067797424","https://openalex.org/W2094531960"],"abstract_inverted_index":{"Classical":[0],"reinforcement":[1],"learning":[2,99,172],"(RL)":[3],"techniques":[4],"are":[5,67],"generally":[6,93],"concerned":[7],"with":[8,34,175,183],"the":[9,16,19,30,35,49,70,72,77,80,90,96,110,114,117,127,141,144,179,187,190],"design":[10],"of":[11,18,79,98,143,189],"decision-making":[12,64,192],"policies":[13,65,174],"driven":[14],"by":[15,76,103,134],"maximisation":[17],"expected":[20,111,152],"outcome.":[21],"Nevertheless,":[22],"this":[23,162],"approach":[24],"does":[25],"not":[26],"take":[27],"into":[28,107],"consideration":[29],"potential":[31,146],"risk":[32,149],"associated":[33],"actions":[36],"taken,":[37],"which":[38],"may":[39],"be":[40,124],"critical":[41],"in":[42,101,155],"certain":[43],"applications.":[44],"To":[45],"address":[46],"that":[47,66],"issue,":[48],"present":[50],"research":[51,163],"work":[52],"introduces":[53],"a":[54,165],"novel":[55],"methodology":[56],"based":[57],"on":[58,186],"distributional":[59,136,180],"RL":[60,102,137,181],"to":[61,69,88,157,178],"derive":[62],"sequential":[63],"sensitive":[68],"risk,":[71],"latter":[73],"being":[74],"modelled":[75],"tail":[78],"return":[81,112,129,153],"probability":[82],"distribution.":[83],"The":[84],"core":[85,97],"idea":[86],"is":[87],"replace":[89],"Q":[91],"function":[92,120],"standing":[94],"at":[95],"schemes":[100],"another":[104],"function,":[105],"taking":[106],"account":[108],"both":[109],"and":[113,151,168],"risk.":[115],"Named":[116],"risk-based":[118],"utility":[119],"U,":[121],"it":[122],"can":[123],"extracted":[125],"from":[126],"random":[128],"distribution":[130],"Z":[131],"naturally":[132],"learnt":[133],"any":[135],"algorithm.":[138],"This":[139],"enables":[140],"spanning":[142],"complete":[145],"trade-off":[147],"between":[148],"minimisation":[150],"maximisation,":[154],"contrast":[156],"fully":[158],"risk-averse":[159],"methodologies.":[160],"Fundamentally,":[161],"yields":[164],"truly":[166],"practical":[167],"accessible":[169],"solution":[170],"for":[171],"risk-sensitive":[173],"minimal":[176],"modification":[177],"algorithm,":[182],"an":[184],"emphasis":[185],"interpretability":[188],"resulting":[191],"process.":[193]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2}],"updated_date":"2026-02-28T09:26:25.869077","created_date":"2025-10-10T00:00:00"}
