{"id":"https://openalex.org/W4405055855","doi":"https://doi.org/10.1109/lra.2024.3512374","title":"Safety Filtering While Training: Improving the Performance and Sample Efficiency of Reinforcement Learning Agents","display_name":"Safety Filtering While Training: Improving the Performance and Sample Efficiency of Reinforcement Learning Agents","publication_year":2024,"publication_date":"2024-12-05","ids":{"openalex":"https://openalex.org/W4405055855","doi":"https://doi.org/10.1109/lra.2024.3512374"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2024.3512374","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3512374","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007539078","display_name":"Federico Pizarro Bejarano","orcid":"https://orcid.org/0000-0003-4566-6466"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]},{"id":"https://openalex.org/I4210116723","display_name":"Robotics Research (United States)","ror":"https://ror.org/020w2fr77","country_code":"US","type":"company","lineage":["https://openalex.org/I4210116723"]}],"countries":["CA","US"],"is_corresponding":true,"raw_author_name":"Federico Pizarro Bejarano","raw_affiliation_strings":["Learning Systems and Robotics Lab, University of Toronto, Toronto, ON, Canada","Learning Systems and Robotics Lab (www.learnsyslab.org), University of Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"Learning Systems and Robotics Lab, University of Toronto, Toronto, ON, Canada","institution_ids":["https://openalex.org/I4210116723"]},{"raw_affiliation_string":"Learning Systems and Robotics Lab (www.learnsyslab.org), University of Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011540585","display_name":"Lukas Brunke","orcid":"https://orcid.org/0000-0002-9893-9889"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]},{"id":"https://openalex.org/I4210116723","display_name":"Robotics Research (United States)","ror":"https://ror.org/020w2fr77","country_code":"US","type":"company","lineage":["https://openalex.org/I4210116723"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Lukas Brunke","raw_affiliation_strings":["Learning Systems and Robotics Lab, University of Toronto, Toronto, ON, Canada","Learning Systems and Robotics Lab (www.learnsyslab.org), University of Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"Learning Systems and Robotics Lab, University of Toronto, Toronto, ON, Canada","institution_ids":["https://openalex.org/I4210116723"]},{"raw_affiliation_string":"Learning Systems and Robotics Lab (www.learnsyslab.org), University of Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052147335","display_name":"Angela P. Schoellig","orcid":"https://orcid.org/0000-0003-4012-4668"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]},{"id":"https://openalex.org/I4210116723","display_name":"Robotics Research (United States)","ror":"https://ror.org/020w2fr77","country_code":"US","type":"company","lineage":["https://openalex.org/I4210116723"]}],"countries":["CA","US"],"is_corresponding":false,"raw_author_name":"Angela P. Schoellig","raw_affiliation_strings":["Learning Systems and Robotics Lab, University of Toronto, Toronto, ON, Canada","Learning Systems and Robotics Lab (www.learnsyslab.org), University of Toronto, Canada"],"affiliations":[{"raw_affiliation_string":"Learning Systems and Robotics Lab, University of Toronto, Toronto, ON, Canada","institution_ids":["https://openalex.org/I4210116723"]},{"raw_affiliation_string":"Learning Systems and Robotics Lab (www.learnsyslab.org), University of Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5007539078"],"corresponding_institution_ids":["https://openalex.org/I185261750","https://openalex.org/I4210116723"],"apc_list":null,"apc_paid":null,"fwci":3.529,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.93138654,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"10","issue":"1","first_page":"788","last_page":"795"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10809","display_name":"Occupational Health and Safety Research","score":0.8551999926567078,"subfield":{"id":"https://openalex.org/subfields/3614","display_name":"Radiological and Ultrasound Technology"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10809","display_name":"Occupational Health and Safety Research","score":0.8551999926567078,"subfield":{"id":"https://openalex.org/subfields/3614","display_name":"Radiological and Ultrasound Technology"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.7885000109672546,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7696999907493591,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.7037491202354431},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.6909734010696411},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6115804314613342},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5900116562843323},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.447429895401001},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3370871841907501},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.20441138744354248},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.07052290439605713},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.06356969475746155},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.05110275745391846},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.04093527793884277}],"concepts":[{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.7037491202354431},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.6909734010696411},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6115804314613342},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5900116562843323},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.447429895401001},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3370871841907501},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.20441138744354248},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.07052290439605713},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.06356969475746155},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.05110275745391846},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.04093527793884277},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2024.3512374","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3512374","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W2171505650","https://openalex.org/W2257979135","https://openalex.org/W2792767783","https://openalex.org/W2887709062","https://openalex.org/W2963293747","https://openalex.org/W2966735560","https://openalex.org/W2968945909","https://openalex.org/W3013679533","https://openalex.org/W3025460448","https://openalex.org/W3142005321","https://openalex.org/W3195968524","https://openalex.org/W3207305612","https://openalex.org/W4206497039","https://openalex.org/W4214717370","https://openalex.org/W4293255219","https://openalex.org/W4293370597","https://openalex.org/W4307876719","https://openalex.org/W4312433875","https://openalex.org/W4313316112","https://openalex.org/W4321225876","https://openalex.org/W4324116431","https://openalex.org/W4352977012","https://openalex.org/W4387123822","https://openalex.org/W4389934657","https://openalex.org/W4391021089","https://openalex.org/W4391741307","https://openalex.org/W6638088447","https://openalex.org/W6725478704","https://openalex.org/W6737893269","https://openalex.org/W6741002519","https://openalex.org/W6747790125","https://openalex.org/W6810709996","https://openalex.org/W6838633502","https://openalex.org/W6855851608","https://openalex.org/W6856428636"],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"(RL)":[2],"controllers":[3,20,62],"are":[4],"flexible":[5],"and":[6,38,45,98,112,119,129,135],"performant":[7],"but":[8],"rarely":[9],"guarantee":[10],"safety.":[11],"Safety":[12],"filters":[13,26,134],"impart":[14],"hard":[15],"safety":[16,25,40,57,82,94,133],"guarantees":[17],"to":[18,32,54,76,78],"RL":[19,61,74,92],"while":[21],"maintaining":[22],"flexibility.":[23],"However,":[24],"can":[27],"cause":[28],"undesired":[29],"behaviours":[30],"due":[31],"the":[33,36,39,56,73,81],"separation":[34],"between":[35],"controller":[37,75],"filter,":[41],"often":[42],"degrading":[43],"performance":[44],"robustness.":[46],"In":[47],"this":[48],"letter,":[49],"we":[50],"analyze":[51],"several":[52],"modifications":[53,71,111],"incorporating":[55],"filter":[58],"in":[59],"training":[60,91,110],"rather":[63],"than":[64],"solely":[65],"applying":[66],"it":[67],"during":[68],"evaluation.":[69],"The":[70],"allow":[72],"learn":[77],"account":[79],"for":[80,127],"filter.":[83],"This":[84],"letter":[85],"presents":[86],"a":[87,102,125],"comprehensive":[88],"analysis":[89],"of":[90],"with":[93,101],"filters,":[95],"featuring":[96],"simulated":[97],"real-world":[99],"experiments":[100],"Crazyflie":[103],"2.0":[104],"drone.":[105],"We":[106],"examine":[107],"how":[108],"various":[109],"hyperparameters":[113],"impact":[114],"performance,":[115],"sample":[116],"efficiency,":[117],"safety,":[118],"chattering.":[120],"Our":[121],"findings":[122],"serve":[123],"as":[124],"guide":[126],"practitioners":[128],"researchers":[130],"focused":[131],"on":[132],"safe":[136],"RL.":[137]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
