{"id":"https://openalex.org/W4414654073","doi":"https://doi.org/10.1145/3770068","title":"Verifying Online Safety Properties for Safe Deep Reinforcement Learning","display_name":"Verifying Online Safety Properties for Safe Deep Reinforcement Learning","publication_year":2025,"publication_date":"2025-09-30","ids":{"openalex":"https://openalex.org/W4414654073","doi":"https://doi.org/10.1145/3770068"},"language":"en","primary_location":{"id":"doi:10.1145/3770068","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3770068","pdf_url":null,"source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005034849","display_name":"Luca Marzari","orcid":"https://orcid.org/0000-0002-0069-0182"},"institutions":[{"id":"https://openalex.org/I119439378","display_name":"University of Verona","ror":"https://ror.org/039bp8j42","country_code":"IT","type":"education","lineage":["https://openalex.org/I119439378"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Luca Marzari","raw_affiliation_strings":["Department of Computer Science, University of Verona, Verona, Italy","Department of Computer Science, University of Verona, Italy"],"raw_orcid":"https://orcid.org/0000-0002-0069-0182","affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Verona, Verona, Italy","institution_ids":["https://openalex.org/I119439378"]},{"raw_affiliation_string":"Department of Computer Science, University of Verona, Italy","institution_ids":["https://openalex.org/I119439378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037548562","display_name":"Ferdinando Cicalese","orcid":"https://orcid.org/0000-0003-1652-0599"},"institutions":[{"id":"https://openalex.org/I119439378","display_name":"University of Verona","ror":"https://ror.org/039bp8j42","country_code":"IT","type":"education","lineage":["https://openalex.org/I119439378"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Ferdinando Cicalese","raw_affiliation_strings":["Department of Computer Science, University of Verona, Verona, Italy","Department of Computer Science, University of Verona, Italy"],"raw_orcid":"https://orcid.org/0000-0003-1652-0599","affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Verona, Verona, Italy","institution_ids":["https://openalex.org/I119439378"]},{"raw_affiliation_string":"Department of Computer Science, University of Verona, Italy","institution_ids":["https://openalex.org/I119439378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045922629","display_name":"Alessandro Farinelli","orcid":"https://orcid.org/0000-0002-2592-5814"},"institutions":[{"id":"https://openalex.org/I119439378","display_name":"University of Verona","ror":"https://ror.org/039bp8j42","country_code":"IT","type":"education","lineage":["https://openalex.org/I119439378"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Alessandro Farinelli","raw_affiliation_strings":["Department of Computer Science, University of Verona, Verona, Italy","Department of Computer Science, University of Verona, Italy"],"raw_orcid":"https://orcid.org/0000-0002-2592-5814","affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Verona, Verona, Italy","institution_ids":["https://openalex.org/I119439378"]},{"raw_affiliation_string":"Department of Computer Science, University of Verona, Italy","institution_ids":["https://openalex.org/I119439378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033129735","display_name":"Christopher Amato","orcid":"https://orcid.org/0000-0002-6786-7384"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Amato","raw_affiliation_strings":["Khoury College of Computer Sciences, Northeastern University, Boston, Massachusetts, USA","Khoury College of Computer Sciences, Northeastern University, USA"],"raw_orcid":"https://orcid.org/0000-0002-6786-7384","affiliations":[{"raw_affiliation_string":"Khoury College of Computer Sciences, Northeastern University, Boston, Massachusetts, USA","institution_ids":["https://openalex.org/I12912129"]},{"raw_affiliation_string":"Khoury College of Computer Sciences, Northeastern University, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058298540","display_name":"Enrico Marchesini","orcid":"https://orcid.org/0000-0003-1858-7279"},"institutions":[{"id":"https://openalex.org/I4210143601","display_name":"Decision Systems (United States)","ror":"https://ror.org/0434dpa13","country_code":"US","type":"company","lineage":["https://openalex.org/I4210143601"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Enrico Marchesini","raw_affiliation_strings":["Laboratory for Information and Decision Systems, Massachusetts Institute of Technology, Cambridge, Massachusetts, USA","Laboratory for Information &amp; Decision Systems, Massachusetts Institute of Technology, USA"],"raw_orcid":"https://orcid.org/0000-0003-1858-7279","affiliations":[{"raw_affiliation_string":"Laboratory for Information and Decision Systems, Massachusetts Institute of Technology, Cambridge, Massachusetts, USA","institution_ids":["https://openalex.org/I4210143601","https://openalex.org/I63966007"]},{"raw_affiliation_string":"Laboratory for Information &amp; Decision Systems, Massachusetts Institute of Technology, USA","institution_ids":["https://openalex.org/I4210143601"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5005034849"],"corresponding_institution_ids":["https://openalex.org/I119439378"],"apc_list":null,"apc_paid":null,"fwci":2.0951,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.90304828,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"17","issue":"1","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10917","display_name":"Smart Grid Security and Resilience","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.838699996471405},{"id":"https://openalex.org/keywords/safer","display_name":"SAFER","score":0.724399983882904},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6074000000953674},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.492000013589859},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4781000018119812},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.477400004863739},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.38760000467300415},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.3815999925136566}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8543999791145325},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.838699996471405},{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.724399983882904},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6074000000953674},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.492000013589859},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4781000018119812},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.477400004863739},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4465999901294708},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4153999984264374},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.38760000467300415},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.3815999925136566},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.3653999865055084},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.349700003862381},{"id":"https://openalex.org/C111498074","wikidata":"https://www.wikidata.org/wiki/Q173326","display_name":"Formal verification","level":2,"score":0.3319999873638153},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.32100000977516174},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.3190000057220459},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2743000090122223},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.2639999985694885},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.25920000672340393},{"id":"https://openalex.org/C132835097","wikidata":"https://www.wikidata.org/wiki/Q7663745","display_name":"System safety","level":2,"score":0.2549999952316284},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2531999945640564},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.2524999976158142},{"id":"https://openalex.org/C75606506","wikidata":"https://www.wikidata.org/wiki/Q1049183","display_name":"Formal methods","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3770068","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3770068","pdf_url":null,"source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"},{"id":"pmh:oai:dspace.mit.edu:1721.1/162893","is_oa":false,"landing_page_url":"https://hdl.handle.net/1721.1/162893","pdf_url":null,"source":{"id":"https://openalex.org/S4306400425","display_name":"DSpace@MIT (Massachusetts Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I63966007","host_organization_name":"Massachusetts Institute of Technology","host_organization_lineage":["https://openalex.org/I63966007"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Association for Computing Machinery","raw_type":"http://purl.org/eprint/type/JournalArticle"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G246041870","display_name":null,"funder_award_id":"W911NF20-1-0265","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"}],"funders":[{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W2296514843","https://openalex.org/W2525579820","https://openalex.org/W2539402368","https://openalex.org/W2594877703","https://openalex.org/W2913756371","https://openalex.org/W2963019567","https://openalex.org/W2963428623","https://openalex.org/W2998619042","https://openalex.org/W3089695338","https://openalex.org/W3164005523","https://openalex.org/W4200103418","https://openalex.org/W4200342431","https://openalex.org/W4206497039","https://openalex.org/W4226429284","https://openalex.org/W4285102394","https://openalex.org/W4285600796","https://openalex.org/W4285805590","https://openalex.org/W4286609345","https://openalex.org/W4308244718","https://openalex.org/W4381571085","https://openalex.org/W4382318502","https://openalex.org/W4412514823","https://openalex.org/W4413017482"],"related_works":[],"abstract_inverted_index":{"Ensuring":[0],"safety":[1,44,64,120,196],"in":[2,11,31,46,110],"reinforcement":[3],"learning":[4],"(RL)":[5],"is":[6,65,146],"critical":[7],"for":[8,72],"deploying":[9],"agents":[10],"real-world":[12,186],"applications.":[13],"During":[14],"training,":[15,63],"current":[16],"safe":[17,97],"RL":[18,98],"approaches":[19,193],"often":[20],"rely":[21],"on":[22,52,101,182],"indicator":[23],"cost":[24,90],"functions":[25,91],"that":[26,125,191],"provide":[27],"sparse":[28],"feedback,":[29],"resulting":[30],"two":[32],"key":[33],"limitations:":[34],"(i)":[35],"poor":[36],"sample":[37],"efficiency":[38],"due":[39],"to":[40,56,128,149,155,175,210],"the":[41,86,114,133,136,151,161,177,199],"lack":[42],"of":[43,88,113,135,164,201],"information":[45],"neighboring":[47],"states,":[48],"and":[49,59,131,185],"(ii)":[50],"dependence":[51],"cost-value":[53],"functions,":[54],"leading":[55],"brittle":[57],"convergence":[58],"suboptimal":[60],"performance.":[61],"After":[62],"guaranteed":[66],"via":[67,92],"formal":[68],"verification":[69,93],"(FV)":[70],"methods":[71],"deep":[73],"neural":[74],"networks,":[75],"whose":[76],"computational":[77],"complexity":[78],"hinders":[79],"their":[80],"application":[81],"during":[82,153],"training.":[83],"We":[84],"address":[85],"limitations":[87],"using":[89],"by":[94,197],"proposing":[95],"a":[96,102,111],"method":[99],"based":[100],"violation":[103,144,178],"value\u2014the":[104],"risk":[105],"associated":[106],"with":[107,172],"policy":[108,158],"decisions":[109],"portion":[112],"state":[115,137],"space.":[116],"Our":[117],"approach":[118],"verifies":[119],"properties":[121,140],"(i.e.,":[122],"state-action":[123],"pairs)":[124],"may":[126],"lead":[127],"unsafe":[129,202],"behavior,":[130],"quantifies":[132],"size":[134],"space":[138],"where":[139],"are":[141],"violated.":[142],"This":[143],"value":[145],"then":[147],"used":[148],"penalize":[150],"agent":[152],"training":[154],"encourage":[156],"safer":[157],"behavior.":[159],"Given":[160],"NP-hard":[162],"nature":[163],"FV,":[165],"we":[166],"propose":[167],"an":[168],"efficient,":[169],"sample-based":[170],"approximation":[171],"probabilistic":[173],"guarantees":[174],"compute":[176],"value.":[179],"Extensive":[180],"experiments":[181],"standard":[183],"benchmarks":[184],"robotic":[187],"navigation":[188],"tasks":[189],"show":[190],"violation-augmented":[192],"significantly":[194],"improve":[195],"reducing":[198],"number":[200],"states":[203],"encountered":[204],"while":[205],"achieving":[206],"superior":[207],"performance":[208],"compared":[209],"existing":[211],"methods.":[212]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
