{"id":"https://openalex.org/W4296823901","doi":"https://doi.org/10.1007/s11334-022-00480-4","title":"Online shielding for reinforcement learning","display_name":"Online shielding for reinforcement learning","publication_year":2022,"publication_date":"2022-09-23","ids":{"openalex":"https://openalex.org/W4296823901","doi":"https://doi.org/10.1007/s11334-022-00480-4"},"language":"en","primary_location":{"id":"doi:10.1007/s11334-022-00480-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11334-022-00480-4","pdf_url":null,"source":{"id":"https://openalex.org/S151239915","display_name":"Innovations in Systems and Software Engineering","issn_l":"1614-5046","issn":["1614-5046","1614-5054"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Innovations in Systems and Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1007/s11334-022-00480-4","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042655793","display_name":"Bettina K\u00f6nighofer","orcid":"https://orcid.org/0000-0001-5183-5452"},"institutions":[{"id":"https://openalex.org/I4092182","display_name":"Graz University of Technology","ror":"https://ror.org/00d7xrm67","country_code":"AT","type":"education","lineage":["https://openalex.org/I4092182"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Bettina K\u00f6nighofer","raw_affiliation_strings":["Institute of Applied Information Processing and Communications, Graz University of Technology, Graz, Austria","Lamarr Security Research, Graz, Austria"],"raw_orcid":"https://orcid.org/0000-0001-5183-5452","affiliations":[{"raw_affiliation_string":"Institute of Applied Information Processing and Communications, Graz University of Technology, Graz, Austria","institution_ids":["https://openalex.org/I4092182"]},{"raw_affiliation_string":"Lamarr Security Research, Graz, Austria","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007191804","display_name":"Julian Rudolf","orcid":null},"institutions":[{"id":"https://openalex.org/I4092182","display_name":"Graz University of Technology","ror":"https://ror.org/00d7xrm67","country_code":"AT","type":"education","lineage":["https://openalex.org/I4092182"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Julian Rudolf","raw_affiliation_strings":["Institute of Applied Information Processing and Communications, Graz University of Technology, Graz, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Applied Information Processing and Communications, Graz University of Technology, Graz, Austria","institution_ids":["https://openalex.org/I4092182"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076759008","display_name":"Alexander Palmisano","orcid":null},"institutions":[{"id":"https://openalex.org/I4092182","display_name":"Graz University of Technology","ror":"https://ror.org/00d7xrm67","country_code":"AT","type":"education","lineage":["https://openalex.org/I4092182"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Alexander Palmisano","raw_affiliation_strings":["Institute of Applied Information Processing and Communications, Graz University of Technology, Graz, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Applied Information Processing and Communications, Graz University of Technology, Graz, Austria","institution_ids":["https://openalex.org/I4092182"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078635542","display_name":"Martin Tappler","orcid":"https://orcid.org/0000-0002-4193-5609"},"institutions":[{"id":"https://openalex.org/I4092182","display_name":"Graz University of Technology","ror":"https://ror.org/00d7xrm67","country_code":"AT","type":"education","lineage":["https://openalex.org/I4092182"]},{"id":"https://openalex.org/I4210123126","display_name":"Silicon Austria Labs (Austria)","ror":"https://ror.org/03b1qgn79","country_code":"AT","type":"company","lineage":["https://openalex.org/I4210123126"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Martin Tappler","raw_affiliation_strings":["Institute of Software Technology, Graz University of Technology, Graz, Austria","TU Graz-SAL DES Lab, Silicon Austria Labs, Graz, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Software Technology, Graz University of Technology, Graz, Austria","institution_ids":["https://openalex.org/I4092182"]},{"raw_affiliation_string":"TU Graz-SAL DES Lab, Silicon Austria Labs, Graz, Austria","institution_ids":["https://openalex.org/I4210123126","https://openalex.org/I4092182"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053520475","display_name":"Roderick Bloem","orcid":"https://orcid.org/0000-0002-1411-5744"},"institutions":[{"id":"https://openalex.org/I4092182","display_name":"Graz University of Technology","ror":"https://ror.org/00d7xrm67","country_code":"AT","type":"education","lineage":["https://openalex.org/I4092182"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Roderick Bloem","raw_affiliation_strings":["Institute of Applied Information Processing and Communications, Graz University of Technology, Graz, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Applied Information Processing and Communications, Graz University of Technology, Graz, Austria","institution_ids":["https://openalex.org/I4092182"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5042655793"],"corresponding_institution_ids":["https://openalex.org/I4092182"],"apc_list":{"value":2190,"currency":"EUR","value_usd":2790},"apc_paid":{"value":2190,"currency":"EUR","value_usd":2790},"fwci":2.8902,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.92025547,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"19","issue":"4","first_page":"379","last_page":"394"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7119351625442505},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7068852782249451},{"id":"https://openalex.org/keywords/electromagnetic-shielding","display_name":"Electromagnetic shielding","score":0.6496133208274841},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5894629955291748},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.5570482015609741},{"id":"https://openalex.org/keywords/intuition","display_name":"Intuition","score":0.5124168992042542},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5029239058494568},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.4718729853630066},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4400441348552704},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4341672658920288},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.36057305335998535},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33693140745162964},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.21153420209884644},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.19497331976890564},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.16700947284698486},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11694204807281494},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09623837471008301}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7119351625442505},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7068852782249451},{"id":"https://openalex.org/C2265751","wikidata":"https://www.wikidata.org/wiki/Q332007","display_name":"Electromagnetic shielding","level":2,"score":0.6496133208274841},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5894629955291748},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.5570482015609741},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.5124168992042542},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5029239058494568},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.4718729853630066},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4400441348552704},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4341672658920288},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.36057305335998535},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33693140745162964},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.21153420209884644},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.19497331976890564},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.16700947284698486},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11694204807281494},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09623837471008301},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11334-022-00480-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11334-022-00480-4","pdf_url":null,"source":{"id":"https://openalex.org/S151239915","display_name":"Innovations in Systems and Software Engineering","issn_l":"1614-5046","issn":["1614-5046","1614-5054"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Innovations in Systems and Software Engineering","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s11334-022-00480-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11334-022-00480-4","pdf_url":null,"source":{"id":"https://openalex.org/S151239915","display_name":"Innovations in Systems and Software Engineering","issn_l":"1614-5046","issn":["1614-5046","1614-5054"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Innovations in Systems and Software Engineering","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7329227181","display_name":null,"funder_award_id":"956123","funder_id":"https://openalex.org/F4320335254","funder_display_name":"Horizon 2020"}],"funders":[{"id":"https://openalex.org/F4320310430","display_name":"TU Graz, Internationale Beziehungen und Mobilit\u00e4tsprogramme","ror":"https://ror.org/00d7xrm67"},{"id":"https://openalex.org/F4320318209","display_name":"Silicon Austria Labs","ror":"https://ror.org/03b1qgn79"},{"id":"https://openalex.org/F4320335254","display_name":"Horizon 2020","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1498432697","https://openalex.org/W1862398452","https://openalex.org/W2023808162","https://openalex.org/W2098543914","https://openalex.org/W2257979135","https://openalex.org/W2406438973","https://openalex.org/W2527393991","https://openalex.org/W2593267253","https://openalex.org/W2787908307","https://openalex.org/W2800727637","https://openalex.org/W2889711700","https://openalex.org/W2895196950","https://openalex.org/W2912757393","https://openalex.org/W2962295100","https://openalex.org/W2963530628","https://openalex.org/W2963575966","https://openalex.org/W2966271811","https://openalex.org/W2966735560","https://openalex.org/W3091691175","https://openalex.org/W3093055384","https://openalex.org/W3099352109","https://openalex.org/W3102039646","https://openalex.org/W3104303413","https://openalex.org/W3132992873","https://openalex.org/W3135325966","https://openalex.org/W3154545355","https://openalex.org/W3159199672","https://openalex.org/W3184164189","https://openalex.org/W3186483141","https://openalex.org/W4206634491","https://openalex.org/W4214717370","https://openalex.org/W4293257530","https://openalex.org/W6600009415"],"related_works":["https://openalex.org/W3096874164","https://openalex.org/W2937181779","https://openalex.org/W2386410636","https://openalex.org/W1985560493","https://openalex.org/W2357975469","https://openalex.org/W2145363145","https://openalex.org/W1626977535","https://openalex.org/W4284974072","https://openalex.org/W2341346307","https://openalex.org/W4225269853"],"abstract_inverted_index":{"Abstract":[0],"Besides":[1],"the":[2,15,40,43,47,72,87,90,99,102,108,112,128,137,145,173,183,191,208,222,237,255,273],"recent":[3],"impressive":[4],"results":[5],"on":[6,121],"reinforcement":[7],"learning":[8],"(RL),":[9],"safety":[10,54,60,80,91,109,146,192,230],"is":[11,22,56,168,197,211,215,234,282],"still":[12],"one":[13,206],"of":[14,46,82,92,147,152,175,188,193,207,254],"major":[16],"research":[17],"challenges":[18],"in":[19,30,155,182],"RL.":[20],"RL":[21,83],"a":[23,51,68,125,251,264,277],"machine-learning":[24],"approach":[25,77,214],"to":[26,105,132,169,239,285],"determine":[27],"near-optimal":[28],"policies":[29],"Markov":[31],"decision":[32],"processes":[33],"(MDPs).":[34],"In":[35],"this":[36,118,122],"paper,":[37],"we":[38,249],"consider":[39],"setting":[41,275],"where":[42,221],"safety-relevant":[44],"fragment":[45],"MDP":[48],"together":[49],"with":[50],"temporal":[52],"logic":[53],"specification":[55,110],"given,":[57],"and":[58,124,159,199,232,272],"many":[59],"violations":[61],"can":[62,226],"be":[63,180,227],"avoided":[64],"by":[65],"planning":[66,219,266],"ahead":[67,151],"short":[69],"time":[70,223],"into":[71],"future.":[73,185],"We":[74],"propose":[75],"an":[76,134],"for":[78,201,217,229,236],"online":[79,166],"shielding":[81,141,167,202],"agents.":[84],"During":[85],"runtime,":[86],"shield":[88,100,129],"analyses":[89],"each":[93,187],"available":[94,195],"action.":[95,119],"For":[96,186,246],"any":[97],"action,":[98],"computes":[101],"maximal":[103],"probability":[104,123],"not":[106],"violate":[107],"within":[111],"next":[113],"k":[114],"steps":[115],"when":[116],"executing":[117],"Based":[120],"given":[126],"threshold,":[127],"decides":[130],"whether":[131],"block":[133],"action":[135],"from":[136],"agent.":[138],"Existing":[139],"offline":[140],"approaches":[142],"compute":[143,170],"exhaustively":[144],"all":[148,176,194],"state-action":[149],"combinations":[150],"time,":[153],"resulting":[154],"huge":[156],"computation":[157],"times":[158],"large":[160,278],"memory":[161],"consumption.":[162],"The":[163,261],"intuition":[164],"behind":[165],"at":[171],"runtime":[172],"set":[174],"states":[177,210],"that":[178,268],"could":[179],"reached":[181],"near":[184],"these":[189,242],"states,":[190],"actions":[196],"analysed":[198],"used":[200,228],"as":[203,205],"soon":[204],"considered":[209],"reached.":[212],"Our":[213],"well-suited":[216],"high-level":[218,265],"problems":[220],"between":[224],"decisions":[225,271],"computations":[231,243],"it":[233],"sustainable":[235],"agent":[238],"wait":[240],"until":[241],"are":[244],"finished.":[245],"our":[247],"evaluation,":[248],"selected":[250],"2-player":[252],"version":[253],"classical":[256],"computer":[257],"game":[258,262],"Snake":[259],".":[260],"represents":[263],"problem":[267],"requires":[269],"fast":[270],"multiplayer":[274],"induces":[276],"state":[279],"space,":[280],"which":[281],"computationally":[283],"expensive":[284],"analyse":[286],"exhaustively.":[287]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":3}],"updated_date":"2026-06-10T14:10:52.464848","created_date":"2025-10-10T00:00:00"}
