{"id":"https://openalex.org/W4389665566","doi":"https://doi.org/10.1109/iros55552.2023.10342464","title":"Reducing Safety Interventions in Provably Safe Reinforcement Learning","display_name":"Reducing Safety Interventions in Provably Safe Reinforcement Learning","publication_year":2023,"publication_date":"2023-10-01","ids":{"openalex":"https://openalex.org/W4389665566","doi":"https://doi.org/10.1109/iros55552.2023.10342464"},"language":"en","primary_location":{"id":"doi:10.1109/iros55552.2023.10342464","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros55552.2023.10342464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025142940","display_name":"Jakob Thumm","orcid":"https://orcid.org/0000-0003-0282-2908"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Jakob Thumm","raw_affiliation_strings":["School of Informatics, Technical University of Munich,Garching,Germany,85748"],"affiliations":[{"raw_affiliation_string":"School of Informatics, Technical University of Munich,Garching,Germany,85748","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014203264","display_name":"Guillaume Pelat","orcid":null},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Guillaume Pelat","raw_affiliation_strings":["School of Informatics, Technical University of Munich,Garching,Germany,85748"],"affiliations":[{"raw_affiliation_string":"School of Informatics, Technical University of Munich,Garching,Germany,85748","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005383495","display_name":"Matthias Althoff","orcid":"https://orcid.org/0000-0003-3733-842X"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Matthias Althoff","raw_affiliation_strings":["School of Informatics, Technical University of Munich,Garching,Germany,85748"],"affiliations":[{"raw_affiliation_string":"School of Informatics, Technical University of Munich,Garching,Germany,85748","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5025142940"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":0.5237,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.7285043,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"7515","last_page":"7522"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8705571889877319},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6645413637161255},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6425632238388062},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6244587898254395},{"id":"https://openalex.org/keywords/intervention","display_name":"Intervention (counseling)","score":0.509067952632904},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.45433762669563293},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4519124925136566},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3529210090637207},{"id":"https://openalex.org/keywords/risk-analysis","display_name":"Risk analysis (engineering)","score":0.34514299035072327},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.2583965063095093},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.08304864168167114}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8705571889877319},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6645413637161255},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6425632238388062},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6244587898254395},{"id":"https://openalex.org/C2780665704","wikidata":"https://www.wikidata.org/wiki/Q959298","display_name":"Intervention (counseling)","level":2,"score":0.509067952632904},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.45433762669563293},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4519124925136566},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3529210090637207},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.34514299035072327},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2583965063095093},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.08304864168167114},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C118552586","wikidata":"https://www.wikidata.org/wiki/Q7867","display_name":"Psychiatry","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros55552.2023.10342464","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros55552.2023.10342464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1845972764","https://openalex.org/W2027579135","https://openalex.org/W2142224528","https://openalex.org/W2583993537","https://openalex.org/W2593845678","https://openalex.org/W2752162720","https://openalex.org/W2772946107","https://openalex.org/W2787908307","https://openalex.org/W2809695613","https://openalex.org/W2963293747","https://openalex.org/W2963428623","https://openalex.org/W2963525569","https://openalex.org/W2963575966","https://openalex.org/W2966735560","https://openalex.org/W2990747716","https://openalex.org/W3090270894","https://openalex.org/W3100944043","https://openalex.org/W3106180689","https://openalex.org/W3115737827","https://openalex.org/W3120090707","https://openalex.org/W3122928565","https://openalex.org/W3127561923","https://openalex.org/W3135301907","https://openalex.org/W3142005321","https://openalex.org/W3156919398","https://openalex.org/W3158253560","https://openalex.org/W3159199672","https://openalex.org/W3176452384","https://openalex.org/W3178194471","https://openalex.org/W3186708588","https://openalex.org/W3195968524","https://openalex.org/W4250589301","https://openalex.org/W4285102237","https://openalex.org/W4285102519","https://openalex.org/W6684921986","https://openalex.org/W6737893269","https://openalex.org/W6738483526","https://openalex.org/W6780587392"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2130974462","https://openalex.org/W972276598","https://openalex.org/W4246352526","https://openalex.org/W2028665553","https://openalex.org/W4230315250","https://openalex.org/W2110944602","https://openalex.org/W3213722473"],"abstract_inverted_index":{"Deep":[0],"Reinforcement":[1],"Learning":[2],"(RL)":[3],"has":[4],"shown":[5],"promise":[6],"in":[7,38],"addressing":[8],"complex":[9],"robotic":[10,135],"challenges.":[11],"In":[12],"real-world":[13,134],"applications,":[14],"RL":[15,83,107],"is":[16],"often":[17],"accompanied":[18],"by":[19],"failsafe":[20,74,121],"controllers":[21],"as":[22,42],"a":[23,72,91,117,130],"last":[24],"resort":[25],"to":[26,71,80,109,129],"avoid":[27],"catastrophic":[28],"events.":[29],"While":[30],"necessary":[31],"for":[32],"safety,":[33],"these":[34],"interventions":[35],"can":[36,126],"result":[37],"undesirable":[39],"behaviors,":[40],"such":[41],"abrupt":[43],"braking":[44],"or":[45],"aggressive":[46],"steering.":[47],"This":[48],"paper":[49],"proposes":[50],"two":[51],"safety":[52,87,114,140],"intervention":[53],"reduction":[54],"methods:":[55],"proactive":[56,59],"replacement":[57],"and":[58,90,116],"projection,":[60],"which":[61],"change":[62],"the":[63,66,85,99],"action":[64],"of":[65,101,120,133],"agent":[67],"if":[68],"it":[69],"leads":[70,108],"potential":[73],"intervention.":[75],"These":[76],"approaches":[77],"are":[78],"compared":[79],"state-of-the-art":[81],"constrained":[82],"on":[84],"OpenAI":[86],"gym":[88],"benchmark":[89],"human-robot":[92],"collab-oration":[93],"task.":[94],"Our":[95,123],"study":[96],"demonstrates":[97],"that":[98],"combination":[100],"our":[102],"method":[103,125],"with":[104,112],"provably":[105],"safe":[106],"high-performing":[110],"policies":[111],"zero":[113],"violations":[115],"low":[118],"number":[119],"interventions.":[122],"versatile":[124],"be":[127],"applied":[128],"wide":[131],"range":[132],"tasks,":[136],"while":[137],"effectively":[138],"improving":[139],"without":[141],"sacrificing":[142],"task":[143],"performance.":[144]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}