{"id":"https://openalex.org/W2912757393","doi":"https://doi.org/10.1007/978-3-030-17462-0_28","title":"Verifiably Safe Off-Model Reinforcement Learning","display_name":"Verifiably Safe Off-Model Reinforcement Learning","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2912757393","doi":"https://doi.org/10.1007/978-3-030-17462-0_28","mag":"2912757393"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-030-17462-0_28","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-030-17462-0_28","pdf_url":"https://link.springer.com/content/pdf/10.1007%2F978-3-030-17462-0_28.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007%2F978-3-030-17462-0_28.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Nathan Fulton","orcid":"https://orcid.org/0000-0002-4172-7631"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nathan Fulton","raw_affiliation_strings":["Computer Science Department, Carnegie Mellon University, Pittsburgh, USA"],"raw_orcid":"https://orcid.org/0000-0002-4172-7631","affiliations":[{"raw_affiliation_string":"Computer Science Department, Carnegie Mellon University, Pittsburgh, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":null,"display_name":"Andr\u00e9 Platzer","orcid":"https://orcid.org/0000-0001-7238-5710"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andr\u00e9 Platzer","raw_affiliation_strings":["Computer Science Department, Carnegie Mellon University, Pittsburgh, USA"],"raw_orcid":"https://orcid.org/0000-0001-7238-5710","affiliations":[{"raw_affiliation_string":"Computer Science Department, Carnegie Mellon University, Pittsburgh, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":{"value":5000,"currency":"EUR","value_usd":5392},"fwci":10.7671,"has_fulltext":true,"cited_by_count":40,"citation_normalized_percentile":{"value":0.98810039,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"413","last_page":"430"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.5063999891281128,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.5063999891281128,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.27709999680519104,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.07729999721050262,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8205000162124634},{"id":"https://openalex.org/keywords/mathematical-proof","display_name":"Mathematical proof","score":0.6858000159263611},{"id":"https://openalex.org/keywords/formal-methods","display_name":"Formal methods","score":0.49160000681877136},{"id":"https://openalex.org/keywords/formal-verification","display_name":"Formal verification","score":0.42820000648498535},{"id":"https://openalex.org/keywords/formal-description","display_name":"Formal description","score":0.40849998593330383},{"id":"https://openalex.org/keywords/formal-specification","display_name":"Formal specification","score":0.3716999888420105},{"id":"https://openalex.org/keywords/model-checking","display_name":"Model checking","score":0.3562000095844269}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8740000128746033},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8205000162124634},{"id":"https://openalex.org/C108710211","wikidata":"https://www.wikidata.org/wiki/Q11538","display_name":"Mathematical proof","level":2,"score":0.6858000159263611},{"id":"https://openalex.org/C75606506","wikidata":"https://www.wikidata.org/wiki/Q1049183","display_name":"Formal methods","level":2,"score":0.49160000681877136},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42899999022483826},{"id":"https://openalex.org/C111498074","wikidata":"https://www.wikidata.org/wiki/Q173326","display_name":"Formal verification","level":2,"score":0.42820000648498535},{"id":"https://openalex.org/C2985583900","wikidata":"https://www.wikidata.org/wiki/Q722617","display_name":"Formal description","level":2,"score":0.40849998593330383},{"id":"https://openalex.org/C116253237","wikidata":"https://www.wikidata.org/wiki/Q1437424","display_name":"Formal specification","level":2,"score":0.3716999888420105},{"id":"https://openalex.org/C110251889","wikidata":"https://www.wikidata.org/wiki/Q1569697","display_name":"Model checking","level":2,"score":0.3562000095844269},{"id":"https://openalex.org/C94461902","wikidata":"https://www.wikidata.org/wiki/Q2762418","display_name":"Formal proof","level":3,"score":0.3441999852657318},{"id":"https://openalex.org/C12298181","wikidata":"https://www.wikidata.org/wiki/Q7246814","display_name":"Proactive learning","level":5,"score":0.31349998712539673},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C2779382394","wikidata":"https://www.wikidata.org/wiki/Q1464197","display_name":"Inductive logic programming","level":2,"score":0.25540000200271606},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25380000472068787}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/978-3-030-17462-0_28","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-030-17462-0_28","pdf_url":"https://link.springer.com/content/pdf/10.1007%2F978-3-030-17462-0_28.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},{"id":"pmh:oai:arXiv.org:1902.05632","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1902.05632","pdf_url":"https://arxiv.org/pdf/1902.05632","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1007/978-3-030-17462-0_28","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-030-17462-0_28","pdf_url":"https://link.springer.com/content/pdf/10.1007%2F978-3-030-17462-0_28.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4713059963","display_name":null,"funder_award_id":"FA8750","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G4895191527","display_name":null,"funder_award_id":"FA8750-18-C-0092","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320332815","display_name":"Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2912757393.pdf","grobid_xml":"https://content.openalex.org/works/W2912757393.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W1484196467","https://openalex.org/W1556387789","https://openalex.org/W1590463358","https://openalex.org/W1592847719","https://openalex.org/W1977444293","https://openalex.org/W1993536119","https://openalex.org/W1995691455","https://openalex.org/W2080884201","https://openalex.org/W2127240436","https://openalex.org/W2129965357","https://openalex.org/W2145373440","https://openalex.org/W2222789563","https://openalex.org/W2296642625","https://openalex.org/W2556717355","https://openalex.org/W2747477340","https://openalex.org/W2787908307","https://openalex.org/W2789158833","https://openalex.org/W2796061040","https://openalex.org/W2798302610","https://openalex.org/W2798378744","https://openalex.org/W2914197476","https://openalex.org/W2962683513","https://openalex.org/W2963575966"],"related_works":[],"abstract_inverted_index":{"The":[0],"desire":[1],"to":[2],"use":[3],"reinforcement":[4,100],"learning":[5,18,24,33,50,101],"in":[6,14,81,102,145],"safety-critical":[7],"settings":[8,103],"has":[9],"inspired":[10],"a":[11,38,115,133],"recent":[12],"interest":[13],"formal":[15,21,96,138],"methods":[16,22],"for":[17,23,70,77,99,141],"algorithms.":[19],"Existing":[20],"and":[25,42,120,127],"optimization":[26],"primarily":[27],"consider":[28],"the":[29,56,91],"problem":[30],"of":[31,117],"constrained":[32,35],"or":[34],"optimization.":[36],"Given":[37],"single":[39],"correct":[40],"model":[41,65,89,125,129],"associated":[43],"safety":[44,57,97,139],"constraint,":[45],"these":[46],"approaches":[47],"guarantee":[48],"efficient":[49],"while":[51],"provably":[52],"avoiding":[53],"behaviors":[54],"outside":[55],"constraint.":[58],"Acting":[59],"well":[60],"given":[61],"an":[62,67],"accurate":[63],"environmental":[64,107],"is":[66,74],"important":[68],"pre-requisite":[69],"safe":[71],"learning,":[72],"but":[73],"ultimately":[75],"insufficient":[76],"systems":[78,143],"that":[79],"operate":[80],"complex":[82],"heterogeneous":[83,146],"environments.":[84,147],"This":[85],"paper":[86],"introduces":[87],"verification-preserving":[88],"updates,":[90],"first":[92,134],"approach":[93,135],"toward":[94,136],"obtaining":[95,137],"guarantees":[98],"where":[104],"multiple":[105],"possible":[106],"models":[108],"must":[109],"be":[110],"taken":[111],"into":[112],"account.":[113],"Through":[114],"combination":[116],"inductive":[118],"data":[119],"deductive":[121],"proving":[122],"with":[123],"design-time":[124],"updates":[126],"runtime":[128],"falsification,":[130],"we":[131],"provide":[132],"proofs":[140],"autonomous":[142],"acting":[144]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":4}],"updated_date":"2026-06-12T08:23:45.883708","created_date":"2019-02-21T00:00:00"}
