{"id":"https://openalex.org/W4415354090","doi":"https://doi.org/10.1145/3715958","title":"Shields for Safe Reinforcement Learning","display_name":"Shields for Safe Reinforcement Learning","publication_year":2025,"publication_date":"2025-10-20","ids":{"openalex":"https://openalex.org/W4415354090","doi":"https://doi.org/10.1145/3715958"},"language":"en","primary_location":{"id":"doi:10.1145/3715958","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3715958","pdf_url":null,"source":{"id":"https://openalex.org/S103482838","display_name":"Communications of the ACM","issn_l":"0001-0782","issn":["0001-0782","1557-7317"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications of the ACM","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3715958","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042655793","display_name":"Bettina K\u00f6nighofer","orcid":"https://orcid.org/0000-0001-5183-5452"},"institutions":[{"id":"https://openalex.org/I4092182","display_name":"Graz University of Technology","ror":"https://ror.org/00d7xrm67","country_code":"AT","type":"education","lineage":["https://openalex.org/I4092182"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Bettina K\u00f6nighofer","raw_affiliation_strings":["Graz University of Technology, Graz, Austria"],"affiliations":[{"raw_affiliation_string":"Graz University of Technology, Graz, Austria","institution_ids":["https://openalex.org/I4092182"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053520475","display_name":"Roderick Bloem","orcid":"https://orcid.org/0000-0002-1411-5744"},"institutions":[{"id":"https://openalex.org/I4092182","display_name":"Graz University of Technology","ror":"https://ror.org/00d7xrm67","country_code":"AT","type":"education","lineage":["https://openalex.org/I4092182"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Roderick Bloem","raw_affiliation_strings":["Graz University of Technology, Graz, Austria"],"affiliations":[{"raw_affiliation_string":"Graz University of Technology, Graz, Austria","institution_ids":["https://openalex.org/I4092182"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012669709","display_name":"Nils Jansen","orcid":"https://orcid.org/0000-0003-1318-8973"},"institutions":[{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]},{"id":"https://openalex.org/I904495901","display_name":"Ruhr University Bochum","ror":"https://ror.org/04tsk2644","country_code":"DE","type":"education","lineage":["https://openalex.org/I904495901"]}],"countries":["DE","NL"],"is_corresponding":false,"raw_author_name":"Nils Jansen","raw_affiliation_strings":["Radboud Universiteit, Nijmegen, Netherlands","Ruhr-Universitat Bochum, Bochum, Germany"],"affiliations":[{"raw_affiliation_string":"Radboud Universiteit, Nijmegen, Netherlands","institution_ids":["https://openalex.org/I145872427"]},{"raw_affiliation_string":"Ruhr-Universitat Bochum, Bochum, Germany","institution_ids":["https://openalex.org/I904495901"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018941708","display_name":"Sebastian Junges","orcid":"https://orcid.org/0000-0003-0978-8466"},"institutions":[{"id":"https://openalex.org/I145872427","display_name":"Radboud University Nijmegen","ror":"https://ror.org/016xsfp80","country_code":"NL","type":"education","lineage":["https://openalex.org/I145872427"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Sebastian Junges","raw_affiliation_strings":["Radboud Universiteit, Nijmegen, Netherlands"],"affiliations":[{"raw_affiliation_string":"Radboud Universiteit, Nijmegen, Netherlands","institution_ids":["https://openalex.org/I145872427"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027153021","display_name":"Stefan Pranger","orcid":"https://orcid.org/0009-0000-6011-9925"},"institutions":[{"id":"https://openalex.org/I4092182","display_name":"Graz University of Technology","ror":"https://ror.org/00d7xrm67","country_code":"AT","type":"education","lineage":["https://openalex.org/I4092182"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Stefan Pranger","raw_affiliation_strings":["Graz University of Technology, Graz, Austria"],"affiliations":[{"raw_affiliation_string":"Graz University of Technology, Graz, Austria","institution_ids":["https://openalex.org/I4092182"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5042655793"],"corresponding_institution_ids":["https://openalex.org/I4092182"],"apc_list":null,"apc_paid":null,"fwci":4.4566,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.9496369,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"68","issue":"11","first_page":"80","last_page":"90"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8253999948501587},{"id":"https://openalex.org/keywords/shields","display_name":"Shields","score":0.713100016117096},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5997999906539917},{"id":"https://openalex.org/keywords/popularity","display_name":"Popularity","score":0.42910000681877136},{"id":"https://openalex.org/keywords/enforcement","display_name":"Enforcement","score":0.4077000021934509}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8253999948501587},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7889999747276306},{"id":"https://openalex.org/C109589588","wikidata":"https://www.wikidata.org/wiki/Q2253638","display_name":"Shields","level":3,"score":0.713100016117096},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5997999906539917},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4620000123977661},{"id":"https://openalex.org/C2780586970","wikidata":"https://www.wikidata.org/wiki/Q1357284","display_name":"Popularity","level":2,"score":0.42910000681877136},{"id":"https://openalex.org/C2779777834","wikidata":"https://www.wikidata.org/wiki/Q4202277","display_name":"Enforcement","level":2,"score":0.4077000021934509},{"id":"https://openalex.org/C77590175","wikidata":"https://www.wikidata.org/wiki/Q3506009","display_name":"Shielded cable","level":2,"score":0.4000999927520752},{"id":"https://openalex.org/C2776179734","wikidata":"https://www.wikidata.org/wiki/Q7398668","display_name":"Safety standards","level":2,"score":0.3452000021934509},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.32280001044273376},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.27880001068115234},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26930001378059387}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3715958","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3715958","pdf_url":null,"source":{"id":"https://openalex.org/S103482838","display_name":"Communications of the ACM","issn_l":"0001-0782","issn":["0001-0782","1557-7317"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications of the ACM","raw_type":"journal-article"},{"id":"pmh:oai:repository.ubn.ru.nl:2066/325236","is_oa":true,"landing_page_url":"https://hdl.handle.net/2066/325236","pdf_url":"https://repository.ubn.ru.nl/bitstream/handle/2066/325236/1/325236.pdf","source":{"id":"https://openalex.org/S4306401067","display_name":"Radboud Repository (Radboud University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I145872427","host_organization_name":"Radboud University Nijmegen","host_organization_lineage":["https://openalex.org/I145872427"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article / Letter to editor"}],"best_oa_location":{"id":"doi:10.1145/3715958","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3715958","pdf_url":null,"source":{"id":"https://openalex.org/S103482838","display_name":"Communications of the ACM","issn_l":"0001-0782","issn":["0001-0782","1557-7317"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications of the ACM","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8680849391","display_name":null,"funder_award_id":"W1255","funder_id":"https://openalex.org/F4320321181","funder_display_name":"Austrian Science Fund"}],"funders":[{"id":"https://openalex.org/F4320321181","display_name":"Austrian Science Fund","ror":"https://ror.org/013tf3c58"},{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"},{"id":"https://openalex.org/F4320326047","display_name":"Amt der Steierm\u00e4rkischen Landesregierung","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1498432697","https://openalex.org/W1977070092","https://openalex.org/W1979349468","https://openalex.org/W2145339207","https://openalex.org/W2222789563","https://openalex.org/W2279014940","https://openalex.org/W2749604329","https://openalex.org/W2763611255","https://openalex.org/W2787908307","https://openalex.org/W2912198824","https://openalex.org/W2912640545","https://openalex.org/W2962295100","https://openalex.org/W2963525569","https://openalex.org/W2963530628","https://openalex.org/W2972769943","https://openalex.org/W3003931103","https://openalex.org/W3027341120","https://openalex.org/W3081016801","https://openalex.org/W3100789280","https://openalex.org/W3186483141","https://openalex.org/W3189250457","https://openalex.org/W3202626246","https://openalex.org/W3216772467","https://openalex.org/W4214717370","https://openalex.org/W4231208998","https://openalex.org/W4283212640","https://openalex.org/W4367664943","https://openalex.org/W4382318502"],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,7,31],"(RL)":[2],"is":[3],"a":[4,38],"prominent":[5],"machine":[6],"technique":[8],"used":[9],"to":[10],"optimize":[11],"an":[12],"agent\u2019s":[13],"performance":[14],"in":[15,93],"potentially":[16],"unknown":[17],"environments.":[18],"Despite":[19],"its":[20],"popularity":[21],"and":[22,33,63,82,88],"success,":[23],"RL":[24],"lacks":[25],"safety":[26,47],"guarantees,":[27],"both":[28],"during":[29],"the":[30,52,55,64,80,90],"phase":[32],"deployment.":[34],"This":[35],"paper":[36],"reviews":[37],"runtime":[39],"enforcement":[40],"method":[41],"called":[42],"shielding":[43],"that":[44,59],"ensures":[45],"provable":[46],"for":[48,74],"RL.":[49],"We":[50],"describe":[51,71],"underlying":[53],"models,":[54],"types":[56],"of":[57,66,85],"guarantees":[58],"can":[60],"be":[61],"delivered,":[62],"process":[65],"computing":[67],"shields.":[68],"Furthermore,":[69],"we":[70],"several":[72],"techniques":[73],"integrating":[75],"shields":[76],"into":[77],"RL,":[78],"discuss":[79],"advantages":[81],"potential":[83],"drawbacks":[84],"this":[86],"integration,":[87],"highlight":[89],"current":[91],"challenges":[92],"shielded":[94],"learning.":[95]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-21T00:00:00"}
