{"id":"https://openalex.org/W3039045514","doi":"https://doi.org/10.1145/3447928.3456653","title":"Verifiably safe exploration for end-to-end reinforcement learning","display_name":"Verifiably safe exploration for end-to-end reinforcement learning","publication_year":2021,"publication_date":"2021-05-04","ids":{"openalex":"https://openalex.org/W3039045514","doi":"https://doi.org/10.1145/3447928.3456653","mag":"3039045514"},"language":"en","primary_location":{"id":"doi:10.1145/3447928.3456653","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3447928.3456653","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3447928.3456653","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th International Conference on Hybrid Systems: Computation and Control","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3447928.3456653","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050376022","display_name":"Nathan Hunt","orcid":null},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nathan Hunt","raw_affiliation_strings":["Massachusetts Institute of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069329835","display_name":"Nathan Fulton","orcid":"https://orcid.org/0000-0002-4172-7631"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nathan Fulton","raw_affiliation_strings":["MIT-IBM Watson AI Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MIT-IBM Watson AI Lab","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073301822","display_name":"Sara Magliacane","orcid":"https://orcid.org/0000-0002-1360-2639"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Sara Magliacane","raw_affiliation_strings":["University of Amsterdam"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Amsterdam","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102929916","display_name":"Trong Nghia Hoang","orcid":"https://orcid.org/0000-0002-9175-6246"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Trong Nghia Hoang","raw_affiliation_strings":["MIT-IBM Watson AI Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MIT-IBM Watson AI Lab","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011183160","display_name":"Subhro Das","orcid":"https://orcid.org/0000-0002-7610-2738"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Subhro Das","raw_affiliation_strings":["MIT-IBM Watson AI Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MIT-IBM Watson AI Lab","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010786661","display_name":"Armando Solar-Lezama","orcid":"https://orcid.org/0000-0001-7604-8252"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Armando Solar-Lezama","raw_affiliation_strings":["Massachusetts Institute of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.8396,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.77616726,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8560570478439331},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.827376127243042},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7038735747337341},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.498737096786499},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4394722282886505},{"id":"https://openalex.org/keywords/risk-analysis","display_name":"Risk analysis (engineering)","score":0.34394553303718567},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.08339276909828186}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8560570478439331},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.827376127243042},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7038735747337341},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.498737096786499},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4394722282886505},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.34394553303718567},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.08339276909828186},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":8,"locations":[{"id":"doi:10.1145/3447928.3456653","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3447928.3456653","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3447928.3456653","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th International Conference on Hybrid Systems: Computation and Control","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2007.01223","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2007.01223","pdf_url":"https://arxiv.org/pdf/2007.01223","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:dare.uva.nl:openaire/e4605542-c419-4304-bd2a-0d4e4dc12dbb","is_oa":true,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/verifiably-safe-exploration-for-endtoend-reinforcement-learning(e4605542-c419-4304-bd2a-0d4e4dc12dbb).html","pdf_url":"https://pure.uva.nl/ws/files/70400436/3447928.3456653.pdf","source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Hunt, N, Fulton, N, Magliacane, S, Hoang, T N, Das, S & Solar-Lezama, A 2021, Verifiably Safe Exploration for End-to-End Reinforcement Learning. in HSCC2021 : proceedings of the 24th International Conference on Hybrid Systems: Computation and Control (part of CPS-IoT Week) : May 19-21, 2021, Nashville, TN, USA., 14, New York, New York, 24th International Conference on Hybrid Systems: Computation and Control, Nashville, Tennessee, United States, 19/05/21. https://doi.org/10.1145/3447928.3456653","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"mag:3039045514","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2007.01223","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:dare.uva.nl:publications/e4605542-c419-4304-bd2a-0d4e4dc12dbb","is_oa":true,"landing_page_url":"http://hdl.handle.net/11245.1/e4605542-c419-4304-bd2a-0d4e4dc12dbb","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Hunt, N, Fulton, N, Magliacane, S, Hoang, T N, Das, S & Solar-Lezama, A 2021, Verifiably Safe Exploration for End-to-End Reinforcement Learning. in HSCC2021 : proceedings of the 24th International Conference on Hybrid Systems: Computation and Control (part of CPS-IoT Week) : May 19-21, 2021, Nashville, TN, USA., 14, New York, New York, 24th International Conference on Hybrid Systems: Computation and Control, Nashville, Tennessee, United States, 19/05/21. https://doi.org/10.1145/3447928.3456653","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:dspace.mit.edu:1721.1/143887","is_oa":true,"landing_page_url":"https://hdl.handle.net/1721.1/143887","pdf_url":null,"source":{"id":"https://openalex.org/S4306400425","display_name":"DSpace@MIT (Massachusetts Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I63966007","host_organization_name":"Massachusetts Institute of Technology","host_organization_lineage":["https://openalex.org/I63966007"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM","raw_type":"http://purl.org/eprint/type/JournalArticle"},{"id":"pmh:uvapub:oai:dare.uva.nl:publications/e4605542-c419-4304-bd2a-0d4e4dc12dbb","is_oa":true,"landing_page_url":"https://dare.uva.nl/personal/pure/en/publications/verifiably-safe-exploration-for-endtoend-reinforcement-learning(e4605542-c419-4304-bd2a-0d4e4dc12dbb).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"HSCC2021: proceedings of the 24th International Conference on Hybrid Systems: Computation and Control (part of CPS-IoT Week) : May 19-21, 2021, Nashville, TN, USA","raw_type":"info:eu-repo/semantics/conferencepaper"},{"id":"doi:10.48550/arxiv.2007.01223","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2007.01223","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1145/3447928.3456653","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3447928.3456653","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3447928.3456653","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th International Conference on Hybrid Systems: Computation and Control","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/17","display_name":"Partnerships for the goals","score":0.47999998927116394}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3039045514.pdf","grobid_xml":"https://content.openalex.org/works/W3039045514.grobid-xml"},"referenced_works_count":53,"referenced_works":["https://openalex.org/W19888102","https://openalex.org/W1510918931","https://openalex.org/W1522301498","https://openalex.org/W1590463358","https://openalex.org/W1757796397","https://openalex.org/W1771410628","https://openalex.org/W1845972764","https://openalex.org/W1941921537","https://openalex.org/W1977444293","https://openalex.org/W2049768769","https://openalex.org/W2080884201","https://openalex.org/W2121863487","https://openalex.org/W2127240436","https://openalex.org/W2154128151","https://openalex.org/W2194775991","https://openalex.org/W2296642625","https://openalex.org/W2521274174","https://openalex.org/W2525936901","https://openalex.org/W2618318883","https://openalex.org/W2736601468","https://openalex.org/W2747477340","https://openalex.org/W2784465508","https://openalex.org/W2784931892","https://openalex.org/W2786036274","https://openalex.org/W2786344118","https://openalex.org/W2787908307","https://openalex.org/W2798588334","https://openalex.org/W2798713496","https://openalex.org/W2808531200","https://openalex.org/W2892521964","https://openalex.org/W2893737399","https://openalex.org/W2895120768","https://openalex.org/W2895196950","https://openalex.org/W2899455150","https://openalex.org/W2912640545","https://openalex.org/W2912722426","https://openalex.org/W2912757393","https://openalex.org/W2935837427","https://openalex.org/W2949608212","https://openalex.org/W2962803570","https://openalex.org/W2963351448","https://openalex.org/W2963575966","https://openalex.org/W2964191931","https://openalex.org/W2964514675","https://openalex.org/W2966467576","https://openalex.org/W2966735560","https://openalex.org/W2972093249","https://openalex.org/W2972744417","https://openalex.org/W3012573144","https://openalex.org/W3037396281","https://openalex.org/W3101115050","https://openalex.org/W3104303413","https://openalex.org/W3159199672"],"related_works":["https://openalex.org/W3132737487","https://openalex.org/W3163219368","https://openalex.org/W3014142920","https://openalex.org/W3008689912","https://openalex.org/W2890130117","https://openalex.org/W1975741370","https://openalex.org/W3092270557","https://openalex.org/W2788729564","https://openalex.org/W3096386778","https://openalex.org/W2530849036","https://openalex.org/W2949893390","https://openalex.org/W3205951249","https://openalex.org/W3165631200","https://openalex.org/W2268946161","https://openalex.org/W2296212956","https://openalex.org/W2949760349","https://openalex.org/W3115284188","https://openalex.org/W2894033912","https://openalex.org/W2250058390","https://openalex.org/W2595166142"],"abstract_inverted_index":{"Deploying":[0],"deep":[1],"reinforcement":[2,142],"learning":[3,80,143],"in":[4,39,64,126,164,183,207,215],"safety-critical":[5],"settings":[6],"requires":[7],"developing":[8],"algorithms":[9],"that":[10,57,84,91,146,158],"obey":[11],"hard":[12,68],"constraints":[13,26,125],"during":[14],"exploration.":[15],"This":[16,169],"paper":[17],"contributes":[18],"a":[19,54,129,179,216],"first":[20],"approach":[21,34,50],"toward":[22],"enforcing":[23],"formal":[24],"safety":[25,96,124,191],"on":[27,36,53,132],"end-to-end":[28],"policies":[29],"with":[30,95],"visual":[31],"inputs.":[32],"Our":[33,70],"draws":[35,72],"recent":[37],"advances":[38],"object":[40],"detection":[41],"and":[42,81,192],"automated":[43],"reasoning":[44],"for":[45,78,115],"hybrid":[46],"dynamical":[47],"systems.":[48],"The":[49],"is":[51,120],"evaluated":[52],"novel":[55],"benchmark":[56,71,102],"emphasizes":[58],"the":[59,65,141,155,165,200,208],"challenge":[60],"of":[61,67,100,128,204],"safely":[62,206],"exploring":[63],"presence":[66],"constraints.":[69,97],"from":[73],"several":[74],"proposed":[75],"problem":[76,203],"sets":[77],"safe":[79,150,160],"includes":[82],"problems":[83],"emphasize":[85],"challenges":[86],"such":[87],"as":[88,116,119],"reward":[89,118],"signals":[90],"are":[92,162,186],"not":[93],"aligned":[94],"On":[98],"each":[99],"these":[101],"problems,":[103],"our":[104],"algorithm":[105,144],"completely":[106],"avoids":[107],"unsafe":[108],"behavior":[109],"while":[110],"remaining":[111],"competitive":[112],"at":[113],"optimizing":[114],"much":[117],"safe.":[121],"We":[122],"characterize":[123],"terms":[127],"refinement":[130],"relation":[131],"Markov":[133],"decision":[134],"processes":[135],"-":[136],"rather":[137],"than":[138],"directly":[139],"constraining":[140],"so":[145,157],"it":[147],"only":[148,159],"takes":[149],"actions,":[151],"we":[152,185],"instead":[153],"refine":[154],"environment":[156,210],"actions":[161],"defined":[163],"environment's":[166],"transition":[167],"structure.":[168],"has":[170],"pragmatic":[171],"system":[172],"design":[173],"benefits":[174],"and,":[175],"more":[176],"importantly,":[177],"provides":[178],"clean":[180],"conceptual":[181],"setting":[182],"which":[184],"able":[187],"to":[188,198],"prove":[189],"important":[190],"efficiency":[193],"properties.":[194],"These":[195],"allow":[196],"us":[197],"transform":[199],"constrained":[201],"optimization":[202,214],"acting":[205],"original":[209],"into":[211],"an":[212],"unconstrained":[213],"refined":[217],"environment.":[218]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
