{"id":"https://openalex.org/W4400995688","doi":"https://doi.org/10.1007/978-3-031-65633-0_11","title":"Safe Exploration in\u00a0Reinforcement Learning by\u00a0Reachability Analysis over\u00a0Learned Models","display_name":"Safe Exploration in\u00a0Reinforcement Learning by\u00a0Reachability Analysis over\u00a0Learned Models","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4400995688","doi":"https://doi.org/10.1007/978-3-031-65633-0_11"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-031-65633-0_11","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-031-65633-0_11","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-031-65633-0_11.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/978-3-031-65633-0_11.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066740799","display_name":"Yuning Wang","orcid":"https://orcid.org/0009-0000-4317-9758"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuning Wang","raw_affiliation_strings":["Rutgers University, New Brunswick, NJ, USA"],"raw_orcid":"https://orcid.org/0009-0000-4317-9758","affiliations":[{"raw_affiliation_string":"Rutgers University, New Brunswick, NJ, USA","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004145814","display_name":"He Zhu","orcid":"https://orcid.org/0000-0001-9606-150X"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"He Zhu","raw_affiliation_strings":["Rutgers University, New Brunswick, NJ, USA"],"raw_orcid":"https://orcid.org/0000-0001-9606-150X","affiliations":[{"raw_affiliation_string":"Rutgers University, New Brunswick, NJ, USA","institution_ids":["https://openalex.org/I102322142"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5066740799"],"corresponding_institution_ids":["https://openalex.org/I102322142"],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":{"value":5000,"currency":"EUR","value_usd":5392},"fwci":0.749,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.69835568,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"232","last_page":"255"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reachability","display_name":"Reachability","score":0.9022719860076904},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8500099778175354},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8446468114852905},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47843268513679504},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35146909952163696},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.2579609155654907}],"concepts":[{"id":"https://openalex.org/C136643341","wikidata":"https://www.wikidata.org/wiki/Q1361526","display_name":"Reachability","level":2,"score":0.9022719860076904},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8500099778175354},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8446468114852905},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47843268513679504},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35146909952163696},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2579609155654907}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/978-3-031-65633-0_11","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-031-65633-0_11","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-031-65633-0_11.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.1007/978-3-031-65633-0_11","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-031-65633-0_11","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-031-65633-0_11.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7725450306","display_name":"SHF: Small: Formal Symbolic Reasoning of Deep Reinforcement Learning Systems","funder_award_id":"2007799","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4400995688.pdf"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W172589331","https://openalex.org/W1963790880","https://openalex.org/W2053572490","https://openalex.org/W2787908307","https://openalex.org/W2892521964","https://openalex.org/W2912757393","https://openalex.org/W2953466973","https://openalex.org/W2963525569","https://openalex.org/W2963575966","https://openalex.org/W3046457509","https://openalex.org/W3091691175","https://openalex.org/W3100366369","https://openalex.org/W3104303413","https://openalex.org/W3104371626","https://openalex.org/W3124237877","https://openalex.org/W3150718622","https://openalex.org/W3150767555","https://openalex.org/W3152923668","https://openalex.org/W3159199672","https://openalex.org/W3187435263","https://openalex.org/W4384471488","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Abstract":[0],"We":[1],"introduce":[2],"VELM,":[3],"a":[4,71],"reinforcement":[5],"learning":[6,35,116],"(RL)":[7],"framework":[8],"grounded":[9],"in":[10,16,77,111],"verification":[11],"principles":[12],"for":[13,53],"safe":[14,76,115],"exploration":[15,68],"unknown":[17],"environments.":[18],"VELM":[19,37,98],"ensures":[20],"that":[21],"an":[22],"RL":[23,66,89,101,122],"agent":[24],"systematically":[25],"explores":[26],"its":[27,104],"environment,":[28],"adhering":[29],"to":[30,63,106,113],"safety":[31,54,85,109],"properties":[32],"throughout":[33],"the":[34,50,65,78,83,88,95,121],"process.":[36],"learns":[38],"environment":[39],"models":[40,52],"as":[41,75],"symbolic":[42],"formulas":[43],"and":[44],"conducts":[45],"formal":[46],"reachability":[47],"analysis":[48],"over":[49],"learned":[51,79],"verification.":[55],"An":[56],"online":[57],"shielding":[58],"layer":[59],"is":[60],"then":[61],"constructed":[62],"confine":[64],"agent\u2019s":[67,123],"solely":[69],"within":[70],"state":[72],"space":[73],"verified":[74],"model,":[80],"thereby":[81],"bolstering":[82],"overall":[84],"profile":[86],"of":[87,97],"system.":[90],"Our":[91],"experimental":[92],"results":[93],"demonstrate":[94],"efficacy":[96],"across":[99],"diverse":[100],"environments,":[102],"highlighting":[103],"capacity":[105],"significantly":[107],"reduce":[108],"violations":[110],"comparison":[112],"existing":[114],"techniques,":[117],"all":[118],"without":[119],"compromising":[120],"reward":[124],"performance.":[125]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
