{"id":"https://openalex.org/W4294691491","doi":"https://doi.org/10.23919/acc53348.2022.9867288","title":"A Probabilistic Perspective on Risk-sensitive Reinforcement Learning","display_name":"A Probabilistic Perspective on Risk-sensitive Reinforcement Learning","publication_year":2022,"publication_date":"2022-06-08","ids":{"openalex":"https://openalex.org/W4294691491","doi":"https://doi.org/10.23919/acc53348.2022.9867288"},"language":"en","primary_location":{"id":"doi:10.23919/acc53348.2022.9867288","is_oa":false,"landing_page_url":"https://doi.org/10.23919/acc53348.2022.9867288","pdf_url":null,"source":{"id":"https://openalex.org/S4363607732","display_name":"2022 American Control Conference (ACC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 American Control Conference (ACC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051837539","display_name":"Erfaun Noorani","orcid":"https://orcid.org/0000-0002-5171-122X"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]},{"id":"https://openalex.org/I4210144776","display_name":"Earth System Science Interdisciplinary Center","ror":"https://ror.org/042607708","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210144776","https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Erfaun Noorani","raw_affiliation_strings":["University of Maryland College Park,Institute for System Research (ISR),Department of Electrical and Computer Engineering,College Park,MD,USA","Department of Electrical and Computer Engineering, Institute for System Research (ISR), University of Maryland College Park, College Park, MD, USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland College Park,Institute for System Research (ISR),Department of Electrical and Computer Engineering,College Park,MD,USA","institution_ids":["https://openalex.org/I66946132","https://openalex.org/I4210144776"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Institute for System Research (ISR), University of Maryland College Park, College Park, MD, USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044395526","display_name":"John S. Baras","orcid":"https://orcid.org/0000-0002-4955-8561"},"institutions":[{"id":"https://openalex.org/I4210144776","display_name":"Earth System Science Interdisciplinary Center","ror":"https://ror.org/042607708","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210144776","https://openalex.org/I66946132"]},{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]},{"id":"https://openalex.org/I4210087176","display_name":"Clark Art Institute","ror":"https://ror.org/003x1ca72","country_code":"US","type":"archive","lineage":["https://openalex.org/I4210087176"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John S. Baras","raw_affiliation_strings":["University of Maryland College Park,Institute for System Research (ISR),Department of Electrical and Computer Engineering,College Park,MD,USA","Clark School of Engineering","Department of Electrical and Computer Engineering, Institute for System Research (ISR), University of Maryland College Park, College Park, MD, USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland College Park,Institute for System Research (ISR),Department of Electrical and Computer Engineering,College Park,MD,USA","institution_ids":["https://openalex.org/I66946132","https://openalex.org/I4210144776"]},{"raw_affiliation_string":"Clark School of Engineering","institution_ids":["https://openalex.org/I4210087176"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Institute for System Research (ISR), University of Maryland College Park, College Park, MD, USA","institution_ids":["https://openalex.org/I66946132"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5051837539"],"corresponding_institution_ids":["https://openalex.org/I4210144776","https://openalex.org/I66946132"],"apc_list":null,"apc_paid":null,"fwci":0.8036,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.70588235,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2697","last_page":"2702"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10315","display_name":"Decision-Making and Behavioral Economics","score":0.9779000282287598,"subfield":{"id":"https://openalex.org/subfields/1800","display_name":"General Decision Sciences"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10315","display_name":"Decision-Making and Behavioral Economics","score":0.9779000282287598,"subfield":{"id":"https://openalex.org/subfields/1800","display_name":"General Decision Sciences"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10042","display_name":"Neural and Behavioral Psychology Studies","score":0.9625999927520752,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10804","display_name":"Health Systems, Economic Evaluations, Quality of Life","score":0.957099974155426,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6272254586219788},{"id":"https://openalex.org/keywords/maximization","display_name":"Maximization","score":0.6114288568496704},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5948565006256104},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.5636162757873535},{"id":"https://openalex.org/keywords/entropy-maximization","display_name":"Entropy maximization","score":0.49059024453163147},{"id":"https://openalex.org/keywords/exponential-function","display_name":"Exponential function","score":0.4891825020313263},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4871639907360077},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47792989015579224},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.4695596992969513},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.45489946007728577},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4515874683856964},{"id":"https://openalex.org/keywords/exponential-utility","display_name":"Exponential utility","score":0.4131612479686737},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3726673126220703},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3204286992549896}],"concepts":[{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6272254586219788},{"id":"https://openalex.org/C2776330181","wikidata":"https://www.wikidata.org/wiki/Q18358244","display_name":"Maximization","level":2,"score":0.6114288568496704},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5948565006256104},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.5636162757873535},{"id":"https://openalex.org/C127233936","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Entropy maximization","level":3,"score":0.49059024453163147},{"id":"https://openalex.org/C151376022","wikidata":"https://www.wikidata.org/wiki/Q168698","display_name":"Exponential function","level":2,"score":0.4891825020313263},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4871639907360077},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47792989015579224},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.4695596992969513},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.45489946007728577},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4515874683856964},{"id":"https://openalex.org/C2778475510","wikidata":"https://www.wikidata.org/wiki/Q5421533","display_name":"Exponential utility","level":2,"score":0.4131612479686737},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3726673126220703},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3204286992549896},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/acc53348.2022.9867288","is_oa":false,"landing_page_url":"https://doi.org/10.23919/acc53348.2022.9867288","pdf_url":null,"source":{"id":"https://openalex.org/S4363607732","display_name":"2022 American Control Conference (ACC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 American Control Conference (ACC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320316514","display_name":"Arm","ror":"https://ror.org/04mmhzs81"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W51049863","https://openalex.org/W112736823","https://openalex.org/W1511986666","https://openalex.org/W1515828703","https://openalex.org/W1968793942","https://openalex.org/W1986478300","https://openalex.org/W1993411524","https://openalex.org/W2013050310","https://openalex.org/W2041946752","https://openalex.org/W2075868924","https://openalex.org/W2096035449","https://openalex.org/W2098774185","https://openalex.org/W2119567691","https://openalex.org/W2120340025","https://openalex.org/W2127082526","https://openalex.org/W2162849300","https://openalex.org/W2310516540","https://openalex.org/W2781726626","https://openalex.org/W2799151646","https://openalex.org/W2887443568","https://openalex.org/W2949207039","https://openalex.org/W2949561945","https://openalex.org/W2978644431","https://openalex.org/W3003521509","https://openalex.org/W4214717370","https://openalex.org/W6604547463","https://openalex.org/W6674884181","https://openalex.org/W6683517036","https://openalex.org/W6734517396","https://openalex.org/W6747473740","https://openalex.org/W6753994356"],"related_works":["https://openalex.org/W2251843845","https://openalex.org/W3102188974","https://openalex.org/W2020801733","https://openalex.org/W2350826920","https://openalex.org/W2060670312","https://openalex.org/W2143074727","https://openalex.org/W2171665021","https://openalex.org/W4210988090","https://openalex.org/W2896838333","https://openalex.org/W1497541798"],"abstract_inverted_index":{"Robustness":[0],"is":[1,109,138,176,207],"a":[2,28,61,88,142,180,246,254],"key":[3],"enabler":[4],"of":[5,8,15,30,41,53,64,104,115,131,153,170,186,227,245,256,264],"real-world":[6],"applications":[7],"Reinforcement":[9,25,80,173],"Learning":[10,26,81,174],"(RL).":[11],"The":[12,243],"robustness":[13],"properties":[14],"risk-sensitive":[16,24,31,38,67,106,228],"controllers":[17],"have":[18],"long":[19],"been":[20],"established.":[21],"We":[22,59,126],"investigate":[23],"(as":[27],"generalization":[29],"stochastic":[32],"control),":[33],"by":[34],"theoretically":[35],"analyzing":[36],"the":[37,42,47,51,66,70,77,100,105,113,129,132,146,151,168,171,184,198,203,212,217,225],"exponential":[39,107,229,251],"(exponential":[40],"total":[43],"reward)":[44],"criteria,":[45,252],"and":[46,49,75,83,209,231,262],"benefits":[48,226],"improvements":[50],"introduction":[52],"risk-sensitivity":[54],"brings":[55],"to":[56,111,140,178,202,216],"conventional":[57],"RL.":[58],"provide":[60],"probabilistic":[62,89],"interpretation":[63],"(I)":[65],"exponential,":[68],"(II)":[69],"risk-neutral":[71,134],"expected":[72,135,218],"cumulative":[73,136,219],"reward,":[74],"(III)":[76],"maximum":[78,204,241],"entropy":[79,205],"objectives,":[82,238],"explore":[84],"their":[85],"connections":[86],"from":[87],"perspective.":[90],"Using":[91],"Probabilistic":[92],"Graphical":[93],"Models":[94],"(PGM),":[95],"we":[96,165],"establish":[97,224],"that":[98,128,167],"in":[99],"RL":[101],"setting,":[102],"maximization":[103,130,169],"criteria":[108],"equivalent":[110,139,177],"maximizing":[112,141,179],"probability":[114,152,185],"taking":[116,154,187],"an":[117,124,155,162,188,195],"optimal":[118,156,189],"action":[119,157,190],"at":[120,158,191],"all":[121,159,192],"time-steps":[122,160,193],"during":[123,161,194],"episode.":[125,163],"show":[127,166],"standard":[133],"return":[137,220],"lower":[143,148,181,199,213],"bound,":[144],"particularly":[145],"Evidence":[147],"Bound,":[149],"on":[150,183,234],"Furthermore,":[164],"maximum-entropy":[172],"objective":[175,206,230],"bound":[182,200,214],"episode,":[196],"where":[197],"corresponding":[201,215],"tighter":[208],"smoother":[210],"than":[211],"objective.":[221],"These":[222],"equivalences":[223],"shed":[232],"lights":[233],"previously":[235],"postulated":[236],"regularized":[237],"such":[239],"as":[240],"entropy.":[242],"utilization":[244],"PGM":[247],"model,":[248],"coupled":[249],"with":[250],"offers":[253],"number":[255],"advantages":[257],"(e.g.":[258],"facilitate":[259],"theoretical":[260],"analysis":[261],"derivation":[263],"bounds).":[265]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
