{"id":"https://openalex.org/W4392114294","doi":"https://doi.org/10.1109/tnnls.2023.3331304","title":"CVaR-Constrained Policy Optimization for Safe Reinforcement Learning","display_name":"CVaR-Constrained Policy Optimization for Safe Reinforcement Learning","publication_year":2024,"publication_date":"2024-02-23","ids":{"openalex":"https://openalex.org/W4392114294","doi":"https://doi.org/10.1109/tnnls.2023.3331304","pmid":"https://pubmed.ncbi.nlm.nih.gov/38393836"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2023.3331304","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3331304","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100739520","display_name":"Qiyuan Zhang","orcid":"https://orcid.org/0000-0002-8519-4259"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiyuan Zhang","raw_affiliation_strings":["School of Mechatronics Engineering, Harbin Institute of Technology, Harbin, China"],"raw_orcid":"https://orcid.org/0000-0002-8519-4259","affiliations":[{"raw_affiliation_string":"School of Mechatronics Engineering, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010742490","display_name":"Shu Leng","orcid":"https://orcid.org/0000-0002-0562-2250"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shu Leng","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027912723","display_name":"Xiaoteng Ma","orcid":"https://orcid.org/0000-0002-7250-6268"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoteng Ma","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-7250-6268","affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056755960","display_name":"Qihan Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qihan Liu","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100737125","display_name":"Xueqian Wang","orcid":"https://orcid.org/0000-0003-3542-0593"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueqian Wang","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3542-0593","affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100644629","display_name":"Bin Liang","orcid":"https://orcid.org/0000-0002-7163-345X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Liang","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-7163-345X","affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100345672","display_name":"Yu Liu","orcid":"https://orcid.org/0000-0001-9107-6390"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Liu","raw_affiliation_strings":["School of Mechatronics Engineering, Harbin Institute of Technology, Harbin, China"],"raw_orcid":"https://orcid.org/0000-0001-9107-6390","affiliations":[{"raw_affiliation_string":"School of Mechatronics Engineering, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101912623","display_name":"Jun Yang","orcid":"https://orcid.org/0000-0002-9386-5825"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yang","raw_affiliation_strings":["Department of Automation, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9386-5825","affiliations":[{"raw_affiliation_string":"Department of Automation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":9.4695,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.98308146,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"36","issue":"1","first_page":"830","last_page":"841"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9611999988555908,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9611999988555908,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9236999750137329,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cvar","display_name":"CVAR","score":0.9932253956794739},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6888943314552307},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.6060338020324707},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5243499279022217},{"id":"https://openalex.org/keywords/quantile","display_name":"Quantile","score":0.47127148509025574},{"id":"https://openalex.org/keywords/expected-shortfall","display_name":"Expected shortfall","score":0.46661487221717834},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.44742751121520996},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24723482131958008},{"id":"https://openalex.org/keywords/risk-management","display_name":"Risk management","score":0.14122885465621948},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.12588319182395935},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.11871704459190369},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.07676059007644653}],"concepts":[{"id":"https://openalex.org/C2779922397","wikidata":"https://www.wikidata.org/wiki/Q5014755","display_name":"CVAR","level":4,"score":0.9932253956794739},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6888943314552307},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.6060338020324707},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5243499279022217},{"id":"https://openalex.org/C118671147","wikidata":"https://www.wikidata.org/wiki/Q578714","display_name":"Quantile","level":2,"score":0.47127148509025574},{"id":"https://openalex.org/C5496284","wikidata":"https://www.wikidata.org/wiki/Q5420856","display_name":"Expected shortfall","level":3,"score":0.46661487221717834},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.44742751121520996},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24723482131958008},{"id":"https://openalex.org/C32896092","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Risk management","level":2,"score":0.14122885465621948},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.12588319182395935},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.11871704459190369},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.07676059007644653},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2023.3331304","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3331304","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:38393836","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38393836","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.4099999964237213}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1647779468","https://openalex.org/W2013406658","https://openalex.org/W2019291268","https://openalex.org/W2027106436","https://openalex.org/W2128347943","https://openalex.org/W2128723253","https://openalex.org/W2130990842","https://openalex.org/W2139137304","https://openalex.org/W2145339207","https://openalex.org/W2515894276","https://openalex.org/W2561666900","https://openalex.org/W2586823359","https://openalex.org/W2765302304","https://openalex.org/W2977518448","https://openalex.org/W2998619042","https://openalex.org/W3021487874","https://openalex.org/W3131738409","https://openalex.org/W3176452384","https://openalex.org/W3177161388","https://openalex.org/W3195860693","https://openalex.org/W3195968524","https://openalex.org/W3216656735","https://openalex.org/W4206497039","https://openalex.org/W4206663383","https://openalex.org/W4221110788","https://openalex.org/W4285076891","https://openalex.org/W4285606945","https://openalex.org/W4353056919","https://openalex.org/W4362722548","https://openalex.org/W6617021176","https://openalex.org/W6627932998","https://openalex.org/W6634413486","https://openalex.org/W6638018090","https://openalex.org/W6675999342","https://openalex.org/W6683300800","https://openalex.org/W6687063787","https://openalex.org/W6737893269","https://openalex.org/W6747473740","https://openalex.org/W6747924173","https://openalex.org/W6751629939","https://openalex.org/W6768617876","https://openalex.org/W6771280675","https://openalex.org/W6780587392","https://openalex.org/W6784643869","https://openalex.org/W6785187516","https://openalex.org/W6786019710","https://openalex.org/W6801967366","https://openalex.org/W6838377450"],"related_works":["https://openalex.org/W4316167276","https://openalex.org/W2002736235","https://openalex.org/W2232143283","https://openalex.org/W1647779468","https://openalex.org/W48101171","https://openalex.org/W2008510020","https://openalex.org/W2096554740","https://openalex.org/W2087892965","https://openalex.org/W2989754218","https://openalex.org/W3124407081"],"abstract_inverted_index":{"Current":[0],"constrained":[1,35,136,163],"reinforcement":[2],"learning":[3],"(RL)":[4],"methods":[5],"guarantee":[6],"constraint":[7,21,58,90,146],"satisfaction":[8,42],"only":[9],"in":[10,23,109],"expectation,":[11],"which":[12],"is":[13,43],"inadequate":[14],"for":[15,45],"safety-critical":[16],"decision":[17],"problems.":[18],"Since":[19],"a":[20,26,57],"satisfied":[22],"expectation":[24],"remains":[25],"high":[27,39],"probability":[28],"of":[29,41,64,89,145,154],"exceeding":[30],"the":[31,53,60,69,77,86,94,97,106,114,119,125,131,135,143,155,169],"cost":[32],"threshold,":[33],"solving":[34],"RL":[36,46,186],"problems":[37],"with":[38],"probabilities":[40],"critical":[44],"safety.":[47],"In":[48,139],"this":[49],"work,":[50],"we":[51,103],"consider":[52],"safety":[54,177],"criterion":[55],"as":[56],"on":[59,96,162],"conditional":[61],"value-at-risk":[62],"(CVaR)":[63],"cumulative":[65],"costs,":[66],"and":[67,118,179],"propose":[68],"CVaR-constrained":[70,107],"policy":[71,128],"optimization":[72,137],"algorithm":[73],"(CVaR-CPO)":[74],"to":[75,85,93,134,148,166,183],"maximize":[76],"expected":[78],"return":[79],"while":[80],"ensuring":[81],"agents":[82],"pay":[83],"attention":[84],"upper":[87],"tail":[88],"costs.":[91],"According":[92],"bound":[95],"CVaR-related":[98,156],"performance":[99,182],"between":[100],"two":[101],"policies,":[102],"first":[104],"reformulate":[105],"problem":[108],"augmented":[110],"state":[111,115],"space":[112],"using":[113],"extension":[116],"procedure":[117],"trust-region":[120],"method.":[121],"CVaR-CPO":[122,141],"then":[123],"derives":[124],"optimal":[126],"update":[127],"by":[129],"applying":[130],"Lagrangian":[132],"method":[133,171],"problem.":[138],"addition,":[140],"utilizes":[142],"distribution":[144],"costs":[147],"provide":[149],"an":[150],"efficient":[151],"quantile-based":[152],"estimation":[153],"value":[157],"function.":[158],"We":[159],"conduct":[160],"experiments":[161],"control":[164],"tasks":[165],"show":[167],"that":[168,175],"proposed":[170],"can":[172],"produce":[173],"behaviors":[174],"satisfy":[176],"constraints,":[178],"achieve":[180],"comparable":[181],"most":[184],"safe":[185],"(SRL)":[187],"methods.":[188]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":21},{"year":2024,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
