{"id":"https://openalex.org/W3207935216","doi":"https://doi.org/10.1109/icra48506.2021.9561253","title":"Model-based Reinforcement Learning with Provable Safety Guarantees via Control Barrier Functions","display_name":"Model-based Reinforcement Learning with Provable Safety Guarantees via Control Barrier Functions","publication_year":2021,"publication_date":"2021-05-30","ids":{"openalex":"https://openalex.org/W3207935216","doi":"https://doi.org/10.1109/icra48506.2021.9561253","mag":"3207935216"},"language":"en","primary_location":{"id":"doi:10.1109/icra48506.2021.9561253","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9561253","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072377630","display_name":"Hongchao Zhang","orcid":"https://orcid.org/0000-0003-2716-3267"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hongchao Zhang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Worcester Polytechnic Inistitute, Worcester, MA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Worcester Polytechnic Inistitute, Worcester, MA, USA","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013564936","display_name":"Zhouchi Li","orcid":"https://orcid.org/0000-0003-2040-0397"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhouchi Li","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Worcester Polytechnic Inistitute, Worcester, MA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Worcester Polytechnic Inistitute, Worcester, MA, USA","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004774385","display_name":"Andrew Clark","orcid":"https://orcid.org/0000-0002-5868-6186"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew Clark","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Worcester Polytechnic Inistitute, Worcester, MA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Worcester Polytechnic Inistitute, Worcester, MA, USA","institution_ids":["https://openalex.org/I107077323"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5072377630"],"corresponding_institution_ids":["https://openalex.org/I107077323"],"apc_list":null,"apc_paid":null,"fwci":0.4079,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.69349513,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"792","last_page":"798"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9702000021934509,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8332157135009766},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7190708518028259},{"id":"https://openalex.org/keywords/inverted-pendulum","display_name":"Inverted pendulum","score":0.6754165887832642},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4795075058937073},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.477802574634552},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4691570997238159},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.41659659147262573}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8332157135009766},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7190708518028259},{"id":"https://openalex.org/C192921069","wikidata":"https://www.wikidata.org/wiki/Q550134","display_name":"Inverted pendulum","level":3,"score":0.6754165887832642},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4795075058937073},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.477802574634552},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4691570997238159},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.41659659147262573},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra48506.2021.9561253","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra48506.2021.9561253","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W1580561995","https://openalex.org/W1591465481","https://openalex.org/W1746819321","https://openalex.org/W1845972764","https://openalex.org/W1882576690","https://openalex.org/W1891170951","https://openalex.org/W1972149633","https://openalex.org/W1988538197","https://openalex.org/W1999874108","https://openalex.org/W2033118636","https://openalex.org/W2130178506","https://openalex.org/W2134491302","https://openalex.org/W2136192377","https://openalex.org/W2140135625","https://openalex.org/W2172184261","https://openalex.org/W2234962923","https://openalex.org/W2525042068","https://openalex.org/W2588802774","https://openalex.org/W2735010720","https://openalex.org/W2765203865","https://openalex.org/W2788084076","https://openalex.org/W2889711700","https://openalex.org/W2892521964","https://openalex.org/W2905111361","https://openalex.org/W2923132828","https://openalex.org/W2952720101","https://openalex.org/W2962803570","https://openalex.org/W2963148914","https://openalex.org/W2963286335","https://openalex.org/W2963525569","https://openalex.org/W2966735560","https://openalex.org/W2968945909","https://openalex.org/W2970782456","https://openalex.org/W2972429904","https://openalex.org/W2996196387","https://openalex.org/W3001339198","https://openalex.org/W3005974372","https://openalex.org/W3006363489","https://openalex.org/W3010220686","https://openalex.org/W3010871414","https://openalex.org/W3011888914","https://openalex.org/W3016534272","https://openalex.org/W3038180127","https://openalex.org/W3045838296","https://openalex.org/W3099352109","https://openalex.org/W3106002372","https://openalex.org/W3117756863","https://openalex.org/W3163376802","https://openalex.org/W4211049957","https://openalex.org/W4288336312","https://openalex.org/W4293545785","https://openalex.org/W4297780563","https://openalex.org/W6680657880","https://openalex.org/W6682367392","https://openalex.org/W6737893269","https://openalex.org/W6748460798","https://openalex.org/W6757787546","https://openalex.org/W6760359693","https://openalex.org/W6763216130","https://openalex.org/W6772100842","https://openalex.org/W6774062799","https://openalex.org/W6774836160","https://openalex.org/W7005955626"],"related_works":["https://openalex.org/W962423920","https://openalex.org/W2387968248","https://openalex.org/W2350715914","https://openalex.org/W2120821724","https://openalex.org/W2379529020","https://openalex.org/W2101175215","https://openalex.org/W2893549521","https://openalex.org/W1562157883","https://openalex.org/W2330311678","https://openalex.org/W2352322616"],"abstract_inverted_index":{"Safety":[0,12],"is":[1,13,125],"a":[2,51,88,128,132],"critical":[3],"property":[4],"in":[5,16,21],"applications":[6],"including":[7],"robotics,":[8],"transportation,":[9],"and":[10,97,135],"energy.":[11],"especially":[14],"challenging":[15],"reinforcement":[17],"learning":[18],"(RL)":[19],"settings,":[20],"which":[22,38],"uncertainty":[23,96],"of":[24,66,91,131],"the":[25,43,64],"system":[26,134,139],"dynamics":[27],"may":[28],"cause":[29],"safety":[30,40,65,100,115],"violations":[31],"during":[32],"exploration.":[33],"Control":[34,84],"Barrier":[35,85],"Functions":[36,86],"(CBFs),":[37],"enforce":[39],"by":[41,116],"constraining":[42],"control":[44],"actions":[45],"at":[46],"each":[47],"time":[48],"step,":[49],"are":[50],"promising":[52],"approach":[53,124],"for":[54,110],"safety-critical":[55],"control.":[56],"This":[57],"technique":[58],"has":[59,71],"been":[60,73],"applied":[61],"to":[62,93,113,142],"ensure":[63],"model-free":[67],"RL,":[68],"however,":[69],"it":[70],"not":[72],"integrated":[74],"into":[75],"model-based":[76,111],"RL.":[77],"In":[78],"this":[79],"paper,":[80],"we":[81,106],"propose":[82],"Uncertainty-Tolerant":[83],"(UTCBFs),":[87],"new":[89],"class":[90],"CBFs":[92,118],"incorporate":[94],"model":[95],"provide":[98],"provable":[99],"guarantees":[101],"with":[102,119,140],"desired":[103],"probability.":[104],"Furthermore,":[105],"introduce":[107],"an":[108,136],"algorithm":[109],"RL":[112,144],"guarantee":[114],"integrating":[117],"gradient-based":[120],"policy":[121],"search.":[122],"Our":[123],"verified":[126],"through":[127],"numerical":[129],"study":[130],"cart-pole":[133],"inverted":[137],"pendulum":[138],"comparison":[141],"state-of-the-art":[143],"algorithms.":[145]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
