{"id":"https://openalex.org/W4391021228","doi":"https://doi.org/10.1109/cdc49753.2023.10383742","title":"Stable and Safe Reinforcement Learning via a Barrier-Lyapunov Actor-Critic Approach","display_name":"Stable and Safe Reinforcement Learning via a Barrier-Lyapunov Actor-Critic Approach","publication_year":2023,"publication_date":"2023-12-13","ids":{"openalex":"https://openalex.org/W4391021228","doi":"https://doi.org/10.1109/cdc49753.2023.10383742"},"language":"en","primary_location":{"id":"doi:10.1109/cdc49753.2023.10383742","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc49753.2023.10383742","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 62nd IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100551410","display_name":"Liqun Zhao","orcid":"https://orcid.org/0000-0002-2386-5299"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]},{"id":"https://openalex.org/I4210146410","display_name":"Science Oxford","ror":"https://ror.org/04j8yhy50","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I4210146410"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Liqun Zhao","raw_affiliation_strings":["University of Oxford,Department of Engineering Science,Oxford,United Kingdom","Department of Engineering Science, University of Oxford, Oxford, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Oxford,Department of Engineering Science,Oxford,United Kingdom","institution_ids":["https://openalex.org/I4210146410","https://openalex.org/I40120149"]},{"raw_affiliation_string":"Department of Engineering Science, University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I4210146410","https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025590360","display_name":"Konstantinos Gatsis","orcid":"https://orcid.org/0000-0002-0734-5445"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]},{"id":"https://openalex.org/I4210146410","display_name":"Science Oxford","ror":"https://ror.org/04j8yhy50","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I4210146410"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Konstantinos Gatsis","raw_affiliation_strings":["University of Oxford,Department of Engineering Science,Oxford,United Kingdom","Department of Engineering Science, University of Oxford, Oxford, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Oxford,Department of Engineering Science,Oxford,United Kingdom","institution_ids":["https://openalex.org/I4210146410","https://openalex.org/I40120149"]},{"raw_affiliation_string":"Department of Engineering Science, University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I4210146410","https://openalex.org/I40120149"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053811056","display_name":"Antonis Papachristodoulou","orcid":"https://orcid.org/0000-0002-3565-8967"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]},{"id":"https://openalex.org/I4210146410","display_name":"Science Oxford","ror":"https://ror.org/04j8yhy50","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I4210146410"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Antonis Papachristodoulou","raw_affiliation_strings":["University of Oxford,Department of Engineering Science,Oxford,United Kingdom","Department of Engineering Science, University of Oxford, Oxford, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Oxford,Department of Engineering Science,Oxford,United Kingdom","institution_ids":["https://openalex.org/I4210146410","https://openalex.org/I40120149"]},{"raw_affiliation_string":"Department of Engineering Science, University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I4210146410","https://openalex.org/I40120149"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.1211,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.90078371,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1320","last_page":"1325"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10917","display_name":"Smart Grid Security and Resilience","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12810","display_name":"Real-time simulation and control systems","score":0.9667999744415283,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7299923896789551},{"id":"https://openalex.org/keywords/lyapunov-function","display_name":"Lyapunov function","score":0.7109517455101013},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6541233062744141},{"id":"https://openalex.org/keywords/controller","display_name":"Controller (irrigation)","score":0.6524726152420044},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5474600791931152},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.4892697036266327},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.42565932869911194},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.36247387528419495},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35338878631591797},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.31711679697036743},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.2262260615825653},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.18927806615829468}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7299923896789551},{"id":"https://openalex.org/C60640748","wikidata":"https://www.wikidata.org/wiki/Q2337858","display_name":"Lyapunov function","level":3,"score":0.7109517455101013},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6541233062744141},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.6524726152420044},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5474600791931152},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.4892697036266327},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.42565932869911194},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.36247387528419495},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35338878631591797},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.31711679697036743},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.2262260615825653},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.18927806615829468},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/cdc49753.2023.10383742","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc49753.2023.10383742","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 62nd IEEE Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.soton.ac.uk:494564","is_oa":false,"landing_page_url":"http://doi.org/10.1109/CDC49753.2023.10383742>).","pdf_url":null,"source":{"id":"https://openalex.org/S4306401019","display_name":"ePrints Soton (University of Southampton)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I43439940","host_organization_name":"University of Southampton","host_organization_lineage":["https://openalex.org/I43439940"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W2904246096","https://openalex.org/W2966735560","https://openalex.org/W3045059767","https://openalex.org/W3186374574","https://openalex.org/W3210638057","https://openalex.org/W4210527295","https://openalex.org/W4312433875","https://openalex.org/W4361807175","https://openalex.org/W4388065116","https://openalex.org/W4388903135","https://openalex.org/W4390939851","https://openalex.org/W4391020052","https://openalex.org/W4391021228","https://openalex.org/W6737893269","https://openalex.org/W6801468180","https://openalex.org/W6804655846","https://openalex.org/W6839881093"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W2024136090","https://openalex.org/W2586732548","https://openalex.org/W3049728571","https://openalex.org/W2964765435","https://openalex.org/W1508899372"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"(RL)":[2],"has":[3],"demonstrated":[4],"impressive":[5],"performance":[6],"in":[7,74,139,171],"various":[8],"areas":[9],"such":[10],"as":[11],"video":[12],"games":[13],"and":[14,19,50,56,64,89,101,117,151,192,226],"robotics.":[15],"However,":[16],"ensuring":[17],"safety":[18,49,88,100,150,197],"stability,":[20],"which":[21,83,222],"are":[22,106],"two":[23],"critical":[24],"properties":[25],"from":[26,113],"a":[27,31,78,181,210],"control":[28,38,147],"perspective,":[29],"remains":[30],"significant":[32],"challenge":[33],"when":[34,149],"using":[35],"RL":[36,54,75],"to":[37,76,124,200,219],"real-world":[39],"systems.":[40],"In":[41,94],"this":[42,95,178,215],"paper,":[43,216],"we":[44],"first":[45],"provide":[46,145],"definitions":[47],"of":[48,128,196,214],"stability":[51,90,105,152],"for":[52,91,99,104],"the":[53,59,71,86,92,110,114,118,126,129,141,172,186,189],"system,":[55],"then":[57],"combine":[58],"Control":[60,65],"Barrier":[61],"Function":[62,67],"(CBF)":[63],"Lyapunov":[66],"(CLF)":[68],"methods":[69],"with":[70],"actor-critic":[72],"method":[73,121],"propose":[77],"Barrier-Lyapunov":[79],"Actor-Critic":[80],"(BLAC)":[81],"framework":[82,179],"helps":[84],"maintain":[85],"aforementioned":[87],"system.":[93],"framework,":[96],"CBF":[97],"constraints":[98,153,198],"CLF":[102],"constraint":[103],"constructed":[107],"based":[108],"on":[109],"data":[111],"sampled":[112],"replay":[115],"buffer,":[116],"augmented":[119],"Lagrangian":[120],"is":[122,137],"used":[123],"update":[125],"parameters":[127],"RL-based":[130,142],"controller.":[131],"Furthermore,":[132],"an":[133,227],"additional":[134,224],"backup":[135],"controller":[136,143,182],"introduced":[138],"case":[140],"cannot":[144,154],"valid":[146],"signals":[148],"be":[155,169],"satisfied":[156],"simultaneously.":[157],"Simulation":[158],"results":[159],"<sup":[160,163,203,206],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[161,164,204,207],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[162,165],"The":[166],"code":[167],"can":[168,184],"found":[170],"GitHub":[173],"repository:":[174],"https://github.com/LiqunZhao/A-Barrier-Lyapunov-Actor-Critic-Reinforcement-Learning-Approach-for-Safe-and-Stable-Control":[175],"show":[176],"that":[177,183],"yields":[180],"help":[185],"system":[187],"approach":[188],"desired":[190],"state":[191],"cause":[193],"fewer":[194],"violations":[195],"compared":[199],"baseline":[201],"algorithms":[202],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sup>":[205,208],"For":[209],"more":[211],"comprehensive":[212],"version":[213],"please":[217],"refer":[218],"[1]":[220],"(https://arxiv.org/abs/2304.04066)":[221],"includes":[223],"details":[225],"extra":[228],"\u201cSimulated":[229],"Cars\u201d":[230],"task..":[231]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
