{"id":"https://openalex.org/W2586823359","doi":"https://doi.org/10.1109/tnnls.2017.2654539","title":"Safe Exploration Algorithms for Reinforcement Learning Controllers","display_name":"Safe Exploration Algorithms for Reinforcement Learning Controllers","publication_year":2017,"publication_date":"2017-02-07","ids":{"openalex":"https://openalex.org/W2586823359","doi":"https://doi.org/10.1109/tnnls.2017.2654539","mag":"2586823359","pmid":"https://pubmed.ncbi.nlm.nih.gov/28182560"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2017.2654539","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2017.2654539","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082168106","display_name":"Tommaso Mannucci","orcid":"https://orcid.org/0000-0003-1994-2965"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Tommaso Mannucci","raw_affiliation_strings":["Control and Simulation Division, Faculty of Aerospace Engineering, Delft University of Technology, Delft, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0003-1994-2965","affiliations":[{"raw_affiliation_string":"Control and Simulation Division, Faculty of Aerospace Engineering, Delft University of Technology, Delft, The Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084079105","display_name":"Erik-Jan Van Kampen","orcid":"https://orcid.org/0000-0002-5593-4471"},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Erik-Jan van Kampen","raw_affiliation_strings":["Control and Simulation Division, Faculty of Aerospace Engineering, Delft University of Technology, Delft, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0002-5593-4471","affiliations":[{"raw_affiliation_string":"Control and Simulation Division, Faculty of Aerospace Engineering, Delft University of Technology, Delft, The Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Cornelis de Visser","orcid":null},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Cornelis de Visser","raw_affiliation_strings":["Control and Simulation Division, Faculty of Aerospace Engineering, Delft University of Technology, Delft, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Control and Simulation Division, Faculty of Aerospace Engineering, Delft University of Technology, Delft, The Netherlands","institution_ids":["https://openalex.org/I98358874"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111434787","display_name":"Qiping Chu","orcid":null},"institutions":[{"id":"https://openalex.org/I98358874","display_name":"Delft University of Technology","ror":"https://ror.org/02e2c7k09","country_code":"NL","type":"education","lineage":["https://openalex.org/I98358874"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Qiping Chu","raw_affiliation_strings":["Control and Simulation Division, Faculty of Aerospace Engineering, Delft University of Technology, Delft, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Control and Simulation Division, Faculty of Aerospace Engineering, Delft University of Technology, Delft, The Netherlands","institution_ids":["https://openalex.org/I98358874"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5082168106"],"corresponding_institution_ids":["https://openalex.org/I98358874"],"apc_list":null,"apc_paid":null,"fwci":7.0755,"has_fulltext":false,"cited_by_count":110,"citation_normalized_percentile":{"value":0.97476016,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"29","issue":"4","first_page":"1069","last_page":"1081"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7721359133720398},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6728724241256714},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6591604948043823},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.48345717787742615},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.44454899430274963},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38089120388031006},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.16474217176437378},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.13049182295799255}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7721359133720398},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6728724241256714},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6591604948043823},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.48345717787742615},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.44454899430274963},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38089120388031006},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.16474217176437378},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.13049182295799255},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tnnls.2017.2654539","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2017.2654539","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:28182560","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/28182560","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null},{"id":"pmh:oai:tudelft.nl:uuid:a38766ef-e74c-4c91-81c7-a2a13b6ec1a8","is_oa":false,"landing_page_url":"http://resolver.tudelft.nl/uuid:a38766ef-e74c-4c91-81c7-a2a13b6ec1a8","pdf_url":null,"source":{"id":"https://openalex.org/S4306400906","display_name":"Research Repository (Delft University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98358874","host_organization_name":"Delft University of Technology","host_organization_lineage":["https://openalex.org/I98358874"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"journal article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W15411808","https://openalex.org/W32403112","https://openalex.org/W134786152","https://openalex.org/W1487691575","https://openalex.org/W1515851193","https://openalex.org/W1557760016","https://openalex.org/W1570911433","https://openalex.org/W1585575029","https://openalex.org/W1845972764","https://openalex.org/W1931027396","https://openalex.org/W1972149633","https://openalex.org/W1979743043","https://openalex.org/W1986014385","https://openalex.org/W2012279256","https://openalex.org/W2012380850","https://openalex.org/W2035003264","https://openalex.org/W2052688942","https://openalex.org/W2060248504","https://openalex.org/W2062373349","https://openalex.org/W2073384958","https://openalex.org/W2089684487","https://openalex.org/W2089904313","https://openalex.org/W2098600836","https://openalex.org/W2101075098","https://openalex.org/W2103120971","https://openalex.org/W2106632120","https://openalex.org/W2107726111","https://openalex.org/W2114329019","https://openalex.org/W2119559506","https://openalex.org/W2121831660","https://openalex.org/W2128273577","https://openalex.org/W2133101712","https://openalex.org/W2133632477","https://openalex.org/W2151237105","https://openalex.org/W2151489480","https://openalex.org/W2156734147","https://openalex.org/W2165501837","https://openalex.org/W2169206416","https://openalex.org/W2169619645","https://openalex.org/W2208391220","https://openalex.org/W2288565641","https://openalex.org/W2331522707","https://openalex.org/W2489526136","https://openalex.org/W3106238320","https://openalex.org/W3125893104","https://openalex.org/W4205326910","https://openalex.org/W4211221179","https://openalex.org/W4288079541","https://openalex.org/W4297780563","https://openalex.org/W6600644339","https://openalex.org/W6629287044","https://openalex.org/W6633217410","https://openalex.org/W6640490175","https://openalex.org/W6682367392","https://openalex.org/W6683000379","https://openalex.org/W6685043521"],"related_works":["https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4380318855","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W3049728571","https://openalex.org/W20361778","https://openalex.org/W2598946408","https://openalex.org/W2169395074"],"abstract_inverted_index":{"Self-learning":[0],"approaches,":[1],"such":[2],"as":[3],"reinforcement":[4],"learning,":[5],"offer":[6],"new":[7,67],"possibilities":[8],"for":[9,29,154,172],"autonomous":[10],"control":[11,194],"of":[12,59,65,79,83,91,94,105,112,181,186],"uncertain":[13],"or":[14,45,82],"time-varying":[15],"systems.":[16],"However,":[17],"exploring":[18],"an":[19,47,191],"unknown":[20],"environment":[21,35],"under":[22],"limited":[23,103],"prediction":[24],"capabilities":[25],"is":[26,36,62,125,146,175,188],"a":[27,30,66,102,149],"challenge":[28],"learning":[31],"agent.":[32],"If":[33],"the":[34,56,63,80,84,92,95,98,106,110,126,179],"dangerous,":[37],"free":[38],"exploration":[39,99],"can":[40,165],"result":[41],"in":[42,46,148],"physical":[43],"damage":[44],"otherwise":[48],"unacceptable":[49],"behavior.":[50],"With":[51],"respect":[52],"to":[53,108],"existing":[54],"methods,":[55],"main":[57],"contribution":[58],"this":[60,121],"paper":[61],"definition":[64],"approach":[68],"that":[69],"does":[70],"not":[71,176],"require":[72],"global":[73],"safety":[74,137,141,182],"functions,":[75,142],"nor":[76],"specific":[77],"formulations":[78],"dynamics":[81,93],"environment,":[85],"but":[86],"relies":[87],"on":[88,190],"interval":[89],"estimation":[90],"agent":[96,107],"during":[97],"phase,":[100],"assuming":[101],"capability":[104],"perceive":[109],"presence":[111],"incoming":[113],"fatal":[114],"states.":[115],"Two":[116],"algorithms":[117],"are":[118,158],"presented":[119],"with":[120,130],"approach.":[122],"The":[123,160],"first":[124],"Safety":[127],"Handling":[128],"Exploration":[129],"Risk":[131],"Perception":[132],"Algorithm":[133],"(SHERPA),":[134],"which":[135,155,173],"provides":[136],"by":[138],"individuating":[139],"temporary":[140],"called":[143],"backups.":[144],"SHERPA":[145,174],"shown":[147],"simulated,":[150],"simplified":[151],"quadrotor":[152],"task,":[153],"dangerous":[156],"states":[157],"avoided.":[159],"second":[161],"algorithm,":[162],"denominated":[163],"OptiSHERPA,":[164],"safely":[166],"handle":[167],"more":[168],"dynamically":[169],"complex":[170],"systems":[171],"sufficient":[177],"through":[178],"use":[180],"metrics.":[183],"An":[184],"application":[185],"OptiSHERPA":[187],"simulated":[189],"aircraft":[192],"altitude":[193],"task.":[195]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":18},{"year":2022,"cited_by_count":14},{"year":2021,"cited_by_count":21},{"year":2020,"cited_by_count":14},{"year":2019,"cited_by_count":13},{"year":2018,"cited_by_count":7},{"year":2016,"cited_by_count":1}],"updated_date":"2026-05-28T09:10:13.091523","created_date":"2025-10-10T00:00:00"}
