{"id":"https://openalex.org/W4402402057","doi":"https://doi.org/10.1109/tpami.2024.3457538","title":"A Review of Safe Reinforcement Learning: Methods, Theories, and Applications","display_name":"A Review of Safe Reinforcement Learning: Methods, Theories, and Applications","publication_year":2024,"publication_date":"2024-09-10","ids":{"openalex":"https://openalex.org/W4402402057","doi":"https://doi.org/10.1109/tpami.2024.3457538","pmid":"https://pubmed.ncbi.nlm.nih.gov/39255180"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2024.3457538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3457538","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://kclpure.kcl.ac.uk/portal/en/publications/ee878f9a-6f22-4df1-a47f-df4e9e2f0090","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062677595","display_name":"Shangding Gu","orcid":"https://orcid.org/0000-0002-2722-3779"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["DE","US"],"is_corresponding":true,"raw_author_name":"Shangding Gu","raw_affiliation_strings":["University of California, Berkeley, CA, USA","Department of Informatics, Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Department of Informatics, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069283448","display_name":"Yang Long","orcid":"https://orcid.org/0000-0001-7600-3396"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Yang","raw_affiliation_strings":["Institute for AI, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute for AI, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002080576","display_name":"Yali Du","orcid":"https://orcid.org/0000-0001-5683-2621"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yali Du","raw_affiliation_strings":["Department of Informatics, King&#x2019;s College London, London, U.K","Department of Informatics, King&#x0027;s College London, London, U.K"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, King&#x2019;s College London, London, U.K","institution_ids":[]},{"raw_affiliation_string":"Department of Informatics, King&#x0027;s College London, London, U.K","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100323037","display_name":"Guang Chen","orcid":"https://orcid.org/0000-0002-7416-592X"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guang Chen","raw_affiliation_strings":["Department of Computer Science and Technology, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020335733","display_name":"Florian Walter","orcid":"https://orcid.org/0000-0002-8279-7476"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Florian Walter","raw_affiliation_strings":["Department of Informatics, Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100384727","display_name":"Jun Wang","orcid":"https://orcid.org/0000-0002-4021-4228"},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jun Wang","raw_affiliation_strings":["Department of Computer Science, University College London, London, U.K"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University College London, London, U.K","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063781430","display_name":"Alois Knoll","orcid":"https://orcid.org/0000-0003-4840-076X"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Alois Knoll","raw_affiliation_strings":["Department of Informatics, Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5062677595"],"corresponding_institution_ids":["https://openalex.org/I62916508","https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":60.2221,"has_fulltext":false,"cited_by_count":173,"citation_normalized_percentile":{"value":1.0,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"46","issue":"12","first_page":"11216","last_page":"11235"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13295","display_name":"Safety Systems Engineering in Autonomy","score":0.9172999858856201,"subfield":{"id":"https://openalex.org/subfields/2213","display_name":"Safety, Risk, Reliability and Quality"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13295","display_name":"Safety Systems Engineering in Autonomy","score":0.9172999858856201,"subfield":{"id":"https://openalex.org/subfields/2213","display_name":"Safety, Risk, Reliability and Quality"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8403766751289368},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7157692909240723},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5698098540306091},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.45257964730262756},{"id":"https://openalex.org/keywords/open-research","display_name":"Open research","score":0.44626516103744507},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.43989020586013794},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.43744540214538574},{"id":"https://openalex.org/keywords/sample-complexity","display_name":"Sample complexity","score":0.42687633633613586},{"id":"https://openalex.org/keywords/risk-analysis","display_name":"Risk analysis (engineering)","score":0.3655095398426056},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.2633278965950012}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8403766751289368},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7157692909240723},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5698098540306091},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.45257964730262756},{"id":"https://openalex.org/C2778464652","wikidata":"https://www.wikidata.org/wiki/Q309849","display_name":"Open research","level":2,"score":0.44626516103744507},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.43989020586013794},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43744540214538574},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.42687633633613586},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.3655095398426056},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.2633278965950012},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/tpami.2024.3457538","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3457538","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:39255180","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39255180","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null},{"id":"pmh:oai:kclpure.kcl.ac.uk:publications/ee878f9a-6f22-4df1-a47f-df4e9e2f0090","is_oa":true,"landing_page_url":"https://kclpure.kcl.ac.uk/portal/en/publications/ee878f9a-6f22-4df1-a47f-df4e9e2f0090","pdf_url":null,"source":{"id":"https://openalex.org/S4306400216","display_name":"Research Portal (King's College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I183935753","host_organization_name":"King's College London","host_organization_lineage":["https://openalex.org/I183935753"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Gu, S, Yang, L, Du, Y, Chen, G, Walter, F, Wang, J & Knoll, A 2024, 'A Review of Safe Reinforcement Learning: Methods, Theories and Applications', IEEE Transactions on Pattern Analysis and Machine Intelligence (IEEE TPAMI), vol. 46, no. 12, pp. 11216-11235. https://doi.org/10.1109/TPAMI.2024.3457538","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:eprints.ucl.ac.uk.OAI2:10197913","is_oa":false,"landing_page_url":"https://discovery.ucl.ac.uk/id/eprint/10197913/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400024","display_name":"UCL Discovery (University College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I45129253","host_organization_name":"University College London","host_organization_lineage":["https://openalex.org/I45129253"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"   IEEE Transactions on Pattern Analysis and Machine Intelligence       (2024)      ","raw_type":"Article"}],"best_oa_location":{"id":"pmh:oai:kclpure.kcl.ac.uk:publications/ee878f9a-6f22-4df1-a47f-df4e9e2f0090","is_oa":true,"landing_page_url":"https://kclpure.kcl.ac.uk/portal/en/publications/ee878f9a-6f22-4df1-a47f-df4e9e2f0090","pdf_url":null,"source":{"id":"https://openalex.org/S4306400216","display_name":"Research Portal (King's College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I183935753","host_organization_name":"King's College London","host_organization_lineage":["https://openalex.org/I183935753"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Gu, S, Yang, L, Du, Y, Chen, G, Walter, F, Wang, J & Knoll, A 2024, 'A Review of Safe Reinforcement Learning: Methods, Theories and Applications', IEEE Transactions on Pattern Analysis and Machine Intelligence (IEEE TPAMI), vol. 46, no. 12, pp. 11216-11235. https://doi.org/10.1109/TPAMI.2024.3457538","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1605215565","display_name":null,"funder_award_id":"62372329","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":237,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W134786152","https://openalex.org/W280536263","https://openalex.org/W1497976081","https://openalex.org/W1499669280","https://openalex.org/W1504265534","https://openalex.org/W1548148540","https://openalex.org/W1565670382","https://openalex.org/W1574700590","https://openalex.org/W1593687854","https://openalex.org/W1840625103","https://openalex.org/W1845972764","https://openalex.org/W1867103660","https://openalex.org/W1964262340","https://openalex.org/W1965878388","https://openalex.org/W1977777567","https://openalex.org/W1978956894","https://openalex.org/W1982813377","https://openalex.org/W1994616650","https://openalex.org/W1997020095","https://openalex.org/W2001139238","https://openalex.org/W2006859604","https://openalex.org/W2007181101","https://openalex.org/W2013406658","https://openalex.org/W2041367235","https://openalex.org/W2053572490","https://openalex.org/W2054226461","https://openalex.org/W2054586144","https://openalex.org/W2056132907","https://openalex.org/W2058066080","https://openalex.org/W2070570138","https://openalex.org/W2073314543","https://openalex.org/W2077789482","https://openalex.org/W2088413745","https://openalex.org/W2089012547","https://openalex.org/W2089080831","https://openalex.org/W2117428849","https://openalex.org/W2120678009","https://openalex.org/W2126316555","https://openalex.org/W2129462326","https://openalex.org/W2130178506","https://openalex.org/W2131600418","https://openalex.org/W2133626316","https://openalex.org/W2134491302","https://openalex.org/W2137983211","https://openalex.org/W2139914196","https://openalex.org/W2143346970","https://openalex.org/W2156168464","https://openalex.org/W2158782408","https://openalex.org/W2162849300","https://openalex.org/W2291649624","https://openalex.org/W2483880798","https://openalex.org/W2487186542","https://openalex.org/W2566307933","https://openalex.org/W2619551236","https://openalex.org/W2740234467","https://openalex.org/W2784709112","https://openalex.org/W2787908307","https://openalex.org/W2788084076","https://openalex.org/W2892521964","https://openalex.org/W2896642734","https://openalex.org/W2903709398","https://openalex.org/W2904263972","https://openalex.org/W2909414402","https://openalex.org/W2912640545","https://openalex.org/W2939995367","https://openalex.org/W2947145479","https://openalex.org/W2948652605","https://openalex.org/W2949963197","https://openalex.org/W2953466973","https://openalex.org/W2962949934","https://openalex.org/W2963293747","https://openalex.org/W2963525569","https://openalex.org/W2964016927","https://openalex.org/W2966735560","https://openalex.org/W2968104655","https://openalex.org/W2968945909","https://openalex.org/W2968983352","https://openalex.org/W2985713881","https://openalex.org/W2989847975","https://openalex.org/W2990466689","https://openalex.org/W2991046523","https://openalex.org/W2998619042","https://openalex.org/W3000838145","https://openalex.org/W3005148879","https://openalex.org/W3008387910","https://openalex.org/W3012331565","https://openalex.org/W3035020089","https://openalex.org/W3038180127","https://openalex.org/W3040914594","https://openalex.org/W3045520927","https://openalex.org/W3045838296","https://openalex.org/W3048735518","https://openalex.org/W3087810330","https://openalex.org/W3110247328","https://openalex.org/W3115648115","https://openalex.org/W3115737827","https://openalex.org/W3121342653","https://openalex.org/W3125783951","https://openalex.org/W3129038712","https://openalex.org/W3131665514","https://openalex.org/W3135239772","https://openalex.org/W3152923668","https://openalex.org/W3154507809","https://openalex.org/W3157410348","https://openalex.org/W3159199672","https://openalex.org/W3162439934","https://openalex.org/W3163225369","https://openalex.org/W3172347396","https://openalex.org/W3179660843","https://openalex.org/W3189256393","https://openalex.org/W3189576409","https://openalex.org/W3195968524","https://openalex.org/W3198258770","https://openalex.org/W3200353301","https://openalex.org/W3203981070","https://openalex.org/W4210882709","https://openalex.org/W4211202104","https://openalex.org/W4220902452","https://openalex.org/W4220949416","https://openalex.org/W4226048384","https://openalex.org/W4246078117","https://openalex.org/W4250589301","https://openalex.org/W4292313830","https://openalex.org/W4293255219","https://openalex.org/W4293370597","https://openalex.org/W4307876719","https://openalex.org/W4312433875","https://openalex.org/W4312744983","https://openalex.org/W4315471900","https://openalex.org/W4317553716","https://openalex.org/W4327571609","https://openalex.org/W4392514157","https://openalex.org/W4400810536","https://openalex.org/W4402402057","https://openalex.org/W6600390444","https://openalex.org/W6600644339","https://openalex.org/W6604963999","https://openalex.org/W6617021176","https://openalex.org/W6630965284","https://openalex.org/W6633472260","https://openalex.org/W6637429886","https://openalex.org/W6638018090","https://openalex.org/W6639732818","https://openalex.org/W6653435097","https://openalex.org/W6677611147","https://openalex.org/W6677959772","https://openalex.org/W6680657880","https://openalex.org/W6682367392","https://openalex.org/W6684037837","https://openalex.org/W6684338915","https://openalex.org/W6684921986","https://openalex.org/W6687063787","https://openalex.org/W6696708278","https://openalex.org/W6714580625","https://openalex.org/W6717915398","https://openalex.org/W6718836005","https://openalex.org/W6735011893","https://openalex.org/W6736209634","https://openalex.org/W6737893269","https://openalex.org/W6738483526","https://openalex.org/W6741002519","https://openalex.org/W6743945974","https://openalex.org/W6746636705","https://openalex.org/W6746721349","https://openalex.org/W6747473740","https://openalex.org/W6747790125","https://openalex.org/W6748512255","https://openalex.org/W6750514247","https://openalex.org/W6751535212","https://openalex.org/W6751725685","https://openalex.org/W6754779100","https://openalex.org/W6756274953","https://openalex.org/W6757525626","https://openalex.org/W6758924645","https://openalex.org/W6760655553","https://openalex.org/W6760822370","https://openalex.org/W6764346552","https://openalex.org/W6764366976","https://openalex.org/W6764637989","https://openalex.org/W6765804866","https://openalex.org/W6766121242","https://openalex.org/W6767486501","https://openalex.org/W6769121485","https://openalex.org/W6770009701","https://openalex.org/W6770744290","https://openalex.org/W6771280675","https://openalex.org/W6772215039","https://openalex.org/W6772925031","https://openalex.org/W6773490554","https://openalex.org/W6774397352","https://openalex.org/W6774406872","https://openalex.org/W6774521036","https://openalex.org/W6775095043","https://openalex.org/W6779451528","https://openalex.org/W6779812412","https://openalex.org/W6780559895","https://openalex.org/W6781365370","https://openalex.org/W6782766965","https://openalex.org/W6783084392","https://openalex.org/W6783117007","https://openalex.org/W6784411328","https://openalex.org/W6784643869","https://openalex.org/W6785022064","https://openalex.org/W6785187516","https://openalex.org/W6785471904","https://openalex.org/W6785819423","https://openalex.org/W6785876034","https://openalex.org/W6788927584","https://openalex.org/W6789855942","https://openalex.org/W6790435433","https://openalex.org/W6791654189","https://openalex.org/W6795908048","https://openalex.org/W6796690604","https://openalex.org/W6796857944","https://openalex.org/W6796910599","https://openalex.org/W6796988493","https://openalex.org/W6798335325","https://openalex.org/W6799402547","https://openalex.org/W6800004206","https://openalex.org/W6801783160","https://openalex.org/W6801971982","https://openalex.org/W6803800456","https://openalex.org/W6810181232","https://openalex.org/W6810353404","https://openalex.org/W6838720904","https://openalex.org/W6838881121","https://openalex.org/W6841279712","https://openalex.org/W6843754643","https://openalex.org/W6847069214","https://openalex.org/W6849708968","https://openalex.org/W6852653235","https://openalex.org/W6854105899","https://openalex.org/W6856361880","https://openalex.org/W6862793959","https://openalex.org/W6869630987","https://openalex.org/W7029993674"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W2877093712","https://openalex.org/W2116157560","https://openalex.org/W4310614650","https://openalex.org/W4386738330","https://openalex.org/W3205598877"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"(RL)":[2],"has":[3,43],"achieved":[4],"tremendous":[5],"success":[6],"in":[7,20,34,55,69,110,166],"many":[8],"complex":[9],"decision-making":[10],"tasks.":[11],"However,":[12],"safety":[13],"concerns":[14],"are":[15],"raised":[16],"during":[17],"deploying":[18],"RL":[19,30,51,67,78,94,107,138,155,183,193],"real-world":[21,111],"applications,":[22,112],"leading":[23],"to":[24,148,170],"a":[25,44,61,74],"growing":[26],"demand":[27],"for":[28,64,105],"safe":[29,41,50,66,77,93,106,137,154,167,182,192],"algorithms,":[31,184],"such":[32],"as":[33,114],"autonomous":[35],"driving":[36],"and":[37,85,98,121,142,151],"robotics":[38],"scenarios.":[39],"While":[40],"control":[42],"long":[45],"history,":[46],"the":[47,56,80,90,119,125,129,133,149,160,163,179,196],"study":[48,180],"of":[49,76,82,92,127,136,153,162,181],"algorithms":[52,139,194],"is":[53,140],"still":[54],"early":[57],"stages.":[58],"To":[59,177],"establish":[60],"good":[62],"foundation":[63],"future":[65,172],"research,":[68],"this":[70,175],"paper,":[71],"we":[72,88,117,158,185],"provide":[73],"review":[75,89],"from":[79,95,124],"perspectives":[81,126],"methods,":[83],"theories,":[84],"applications.":[86],"First,":[87],"progress":[91,123],"five":[96,102],"dimensions":[97],"come":[99],"up":[100],"with":[101],"crucial":[103],"problems":[104,165],"being":[108],"deployed":[109],"coined":[113],"\"2H3W\".":[115],"Second,":[116],"analyze":[118],"algorithm":[120],"theory":[122],"answering":[128],"\"2H3W\"":[130],"problems.":[131],"Particularly,":[132],"sample":[134],"complexity":[135],"reviewed":[141],"discussed,":[143],"followed":[144],"by":[145],"an":[146,187],"introduction":[147],"applications":[150],"benchmarks":[152],"algorithms.":[156],"Finally,":[157],"open":[159],"discussion":[161],"challenging":[164],"RL,":[168],"hoping":[169],"inspire":[171],"research":[173],"on":[174],"thread.":[176],"advance":[178],"release":[186],"open-sourced":[188],"repository":[189],"containing":[190],"major":[191],"at":[195],"link.":[197]},"counts_by_year":[{"year":2026,"cited_by_count":32},{"year":2025,"cited_by_count":119},{"year":2024,"cited_by_count":19},{"year":2023,"cited_by_count":3}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
