{"id":"https://openalex.org/W4390939851","doi":"https://doi.org/10.1109/tnnls.2023.3348422","title":"Learn Zero-Constraint-Violation Safe Policy in Model-Free Constrained Reinforcement Learning","display_name":"Learn Zero-Constraint-Violation Safe Policy in Model-Free Constrained Reinforcement Learning","publication_year":2024,"publication_date":"2024-01-17","ids":{"openalex":"https://openalex.org/W4390939851","doi":"https://doi.org/10.1109/tnnls.2023.3348422","pmid":"https://pubmed.ncbi.nlm.nih.gov/38231811"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2023.3348422","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3348422","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004124678","display_name":"Haitong Ma","orcid":"https://orcid.org/0000-0002-9943-0638"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haitong Ma","raw_affiliation_strings":["State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, and the Center for Intelligent Connected Vehicles and Transportation, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9943-0638","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, and the Center for Intelligent Connected Vehicles and Transportation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040156274","display_name":"Changliu Liu","orcid":"https://orcid.org/0000-0002-3767-5517"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Changliu Liu","raw_affiliation_strings":["Robotics Institute, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":"https://orcid.org/0000-0002-3767-5517","affiliations":[{"raw_affiliation_string":"Robotics Institute, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100747108","display_name":"Shengbo Eben Li","orcid":"https://orcid.org/0000-0003-4923-3633"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengbo Eben Li","raw_affiliation_strings":["State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, and the Center for Intelligent Connected Vehicles and Transportation, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-4923-3633","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, and the Center for Intelligent Connected Vehicles and Transportation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036282981","display_name":"Sifa Zheng","orcid":"https://orcid.org/0000-0001-5160-1365"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sifa Zheng","raw_affiliation_strings":["State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, and the Center for Intelligent Connected Vehicles and Transportation, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5160-1365","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, and the Center for Intelligent Connected Vehicles and Transportation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101746862","display_name":"Wenchao Sun","orcid":"https://orcid.org/0000-0003-2969-4096"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenchao Sun","raw_affiliation_strings":["State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, and the Center for Intelligent Connected Vehicles and Transportation, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-2969-4096","affiliations":[{"raw_affiliation_string":"State Key Laboratory of Automotive Safety and Energy, School of Vehicle and Mobility, and the Center for Intelligent Connected Vehicles and Transportation, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072222039","display_name":"Jianyu Chen","orcid":"https://orcid.org/0000-0003-0282-8621"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianyu Chen","raw_affiliation_strings":["Institute of Interdisciplinary Information Science, Tsinghua University, Beijing, China","Shanghai Qizhi Institute, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0003-0282-8621","affiliations":[{"raw_affiliation_string":"Institute of Interdisciplinary Information Science, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Shanghai Qizhi Institute, Shanghai, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5004124678"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":4.3438,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.94573038,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"36","issue":"2","first_page":"2327","last_page":"2341"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12810","display_name":"Real-time simulation and control systems","score":0.953499972820282,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12810","display_name":"Real-time simulation and control systems","score":0.953499972820282,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9092000126838684,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8599408268928528},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7364542484283447},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.6780380010604858},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5969662666320801},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5243004560470581},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5148345828056335},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.47816213965415955},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4668135643005371},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4144382178783417},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25458335876464844},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22454378008842468},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.08874955773353577}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8599408268928528},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7364542484283447},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.6780380010604858},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5969662666320801},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5243004560470581},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5148345828056335},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.47816213965415955},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4668135643005371},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4144382178783417},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25458335876464844},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22454378008842468},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.08874955773353577},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2023.3348422","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2023.3348422","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:38231811","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38231811","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.5299999713897705}],"awards":[{"id":"https://openalex.org/G1095835165","display_name":null,"funder_award_id":"52221005","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3069687021","display_name":null,"funder_award_id":"2020YFB1600202","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W2000359213","https://openalex.org/W2073314543","https://openalex.org/W2094387729","https://openalex.org/W2101075098","https://openalex.org/W2141203641","https://openalex.org/W2155511972","https://openalex.org/W2158782408","https://openalex.org/W2325770295","https://openalex.org/W2563378438","https://openalex.org/W2586823359","https://openalex.org/W2735010720","https://openalex.org/W2793066675","https://openalex.org/W2904246096","https://openalex.org/W2963293747","https://openalex.org/W2963525569","https://openalex.org/W2966735560","https://openalex.org/W2968945909","https://openalex.org/W2982316857","https://openalex.org/W2996568167","https://openalex.org/W3006344226","https://openalex.org/W3011964880","https://openalex.org/W3093201391","https://openalex.org/W3095883371","https://openalex.org/W3117126067","https://openalex.org/W3128395826","https://openalex.org/W3135239772","https://openalex.org/W3176452384","https://openalex.org/W3182751084","https://openalex.org/W3187550742","https://openalex.org/W3196755144","https://openalex.org/W3208941764","https://openalex.org/W3209549283","https://openalex.org/W4206497039","https://openalex.org/W4210727699","https://openalex.org/W4221117030","https://openalex.org/W4250739957","https://openalex.org/W4285600796","https://openalex.org/W4307230233","https://openalex.org/W6637967152","https://openalex.org/W6687063787","https://openalex.org/W6737893269","https://openalex.org/W6738483526","https://openalex.org/W6747473740","https://openalex.org/W6747790125","https://openalex.org/W6748839928","https://openalex.org/W6751725685","https://openalex.org/W6760425463","https://openalex.org/W6768617876","https://openalex.org/W6771280675","https://openalex.org/W6774406872","https://openalex.org/W6780032648","https://openalex.org/W6780559895","https://openalex.org/W6780587392","https://openalex.org/W6785187516","https://openalex.org/W6785250840","https://openalex.org/W6785471904","https://openalex.org/W6788898170","https://openalex.org/W6790559938","https://openalex.org/W6795589745","https://openalex.org/W6795857475","https://openalex.org/W6800776581","https://openalex.org/W6803126811","https://openalex.org/W6811221485","https://openalex.org/W6839710182","https://openalex.org/W6922480057","https://openalex.org/W7018863775"],"related_works":["https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4380318855","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W3049728571","https://openalex.org/W20361778","https://openalex.org/W2024136090","https://openalex.org/W2768698792"],"abstract_inverted_index":{"We":[0,142],"focus":[1],"on":[2,88,163],"learning":[3,11],"the":[4,19,31,36,55,66,85,89,96,115,120,125,139,152,164,183],"zero-constraint-violation":[5,61],"safe":[6,44,62,67,86],"policy":[7,45],"in":[8,186],"model-free":[9,14],"reinforcement":[10],"(RL).":[12],"Existing":[13],"RL":[15,135,148],"studies":[16],"mostly":[17],"use":[18,114],"posterior":[20],"penalty":[21],"to":[22,33,59,77,100,118,137,151],"penalize":[23],"dangerous":[24,82,97],"actions,":[25,83],"which":[26,123],"means":[27],"they":[28,39],"must":[29],"experience":[30],"danger":[32],"learn":[34,41,60,119],"from":[35],"danger.":[37],"Therefore,":[38,92],"cannot":[40],"a":[42,133,169,174],"zero-violation":[43,140,155],"even":[46],"after":[47],"convergence.":[48],"To":[49],"handle":[50],"this":[51],"problem,":[52],"we":[53,93,113,131],"leverage":[54],"safety-oriented":[56],"energy":[57,73,121],"functions":[58],"policies":[63,156,185],"and":[64,103,168,192],"propose":[65],"set":[68,87],"actor-critic":[69],"(SSAC)":[70],"algorithm.":[71],"The":[72,158],"function":[74],"is":[75,161],"designed":[76],"increase":[78],"rapidly":[79],"for":[80],"potentially":[81],"locating":[84],"action":[90],"space.":[91],"can":[94],"identify":[95],"actions":[98],"prior":[99],"taking":[101],"them":[102],"achieve":[104,189],"zero-constraint":[105,190],"violation.":[106],"Our":[107],"major":[108],"contributions":[109],"are":[110],"twofold.":[111],"First,":[112],"data-driven":[116],"methods":[117],"function,":[122],"releases":[124],"requirement":[126],"of":[127],"known":[128],"dynamics.":[129],"Second,":[130],"formulate":[132],"constrained":[134,147,153],"problem":[136],"solve":[138],"policies.":[141],"prove":[143],"that":[144,182],"our":[145],"Lagrangian-based":[146],"solutions":[149],"converge":[150],"optimal":[154],"theoretically.":[157],"proposed":[159],"algorithm":[160],"evaluated":[162],"complex":[165],"simulation":[166],"environments":[167,188],"hardware-in-loop":[170],"(HIL)":[171],"experiment":[172],"with":[173,195],"real":[175],"autonomous":[176],"vehicle":[177],"controller.":[178],"Experimental":[179],"results":[180],"suggest":[181],"converged":[184],"all":[187],"violation":[191],"comparable":[193],"performance":[194],"model-based":[196],"baseline.":[197]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
