{"id":"https://openalex.org/W4416748670","doi":"https://doi.org/10.1109/iros60139.2025.11247738","title":"Towards Safe Reinforcement Learning with Reduced Conservativeness: A Case Study on Drone Flight Control","display_name":"Towards Safe Reinforcement Learning with Reduced Conservativeness: A Case Study on Drone Flight Control","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416748670","doi":"https://doi.org/10.1109/iros60139.2025.11247738"},"language":"en","primary_location":{"id":"doi:10.1109/iros60139.2025.11247738","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247738","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027451948","display_name":"Loizos Hadjiloizou","orcid":null},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Loizos Hadjiloizou","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of RPL, EECS,Stockholm,Sweden,11428"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of RPL, EECS,Stockholm,Sweden,11428","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020344044","display_name":"Michael C. Welle","orcid":"https://orcid.org/0000-0003-3827-3824"},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Michael C. Welle","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of RPL, EECS,Stockholm,Sweden,11428"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of RPL, EECS,Stockholm,Sweden,11428","institution_ids":["https://openalex.org/I86987016"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067144254","display_name":"Hang Yin","orcid":"https://orcid.org/0000-0002-3599-440X"},"institutions":[{"id":"https://openalex.org/I124055696","display_name":"University of Copenhagen","ror":"https://ror.org/035b05819","country_code":"DK","type":"education","lineage":["https://openalex.org/I124055696"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Hang Yin","raw_affiliation_strings":["University of Copenhagen,Department of Computer Science,Copenhagen,Denmark,2100"],"affiliations":[{"raw_affiliation_string":"University of Copenhagen,Department of Computer Science,Copenhagen,Denmark,2100","institution_ids":["https://openalex.org/I124055696"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074819656","display_name":"Danica Kragic","orcid":null},"institutions":[{"id":"https://openalex.org/I86987016","display_name":"KTH Royal Institute of Technology","ror":"https://ror.org/026vcq606","country_code":"SE","type":"education","lineage":["https://openalex.org/I86987016"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Danica Kragic","raw_affiliation_strings":["KTH Royal Institute of Technology,Division of RPL, EECS,Stockholm,Sweden,11428"],"affiliations":[{"raw_affiliation_string":"KTH Royal Institute of Technology,Division of RPL, EECS,Stockholm,Sweden,11428","institution_ids":["https://openalex.org/I86987016"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5027451948"],"corresponding_institution_ids":["https://openalex.org/I86987016"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19291647,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"14870","last_page":"14876"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.29339998960494995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.29339998960494995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.18629999458789825,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.07349999994039536,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/drone","display_name":"Drone","score":0.8023999929428101},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.73580002784729},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7157999873161316},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6452000141143799},{"id":"https://openalex.org/keywords/reachability","display_name":"Reachability","score":0.5979999899864197},{"id":"https://openalex.org/keywords/active-safety","display_name":"Active safety","score":0.4401000142097473},{"id":"https://openalex.org/keywords/formal-methods","display_name":"Formal methods","score":0.4311000108718872},{"id":"https://openalex.org/keywords/system-safety","display_name":"System safety","score":0.3785000145435333}],"concepts":[{"id":"https://openalex.org/C59519942","wikidata":"https://www.wikidata.org/wiki/Q650665","display_name":"Drone","level":2,"score":0.8023999929428101},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.73580002784729},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7157999873161316},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6905999779701233},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6452000141143799},{"id":"https://openalex.org/C136643341","wikidata":"https://www.wikidata.org/wiki/Q1361526","display_name":"Reachability","level":2,"score":0.5979999899864197},{"id":"https://openalex.org/C127757376","wikidata":"https://www.wikidata.org/wiki/Q2056514","display_name":"Active safety","level":2,"score":0.4401000142097473},{"id":"https://openalex.org/C75606506","wikidata":"https://www.wikidata.org/wiki/Q1049183","display_name":"Formal methods","level":2,"score":0.4311000108718872},{"id":"https://openalex.org/C132835097","wikidata":"https://www.wikidata.org/wiki/Q7663745","display_name":"System safety","level":2,"score":0.3785000145435333},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.37450000643730164},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.33230000734329224},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.32829999923706055},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31380000710487366},{"id":"https://openalex.org/C111498074","wikidata":"https://www.wikidata.org/wiki/Q173326","display_name":"Formal verification","level":2,"score":0.30070000886917114},{"id":"https://openalex.org/C17500928","wikidata":"https://www.wikidata.org/wiki/Q959968","display_name":"Control system","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C36299963","wikidata":"https://www.wikidata.org/wiki/Q1369844","display_name":"Observability","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C2776654903","wikidata":"https://www.wikidata.org/wiki/Q2601463","display_name":"SAFER","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C110251889","wikidata":"https://www.wikidata.org/wiki/Q1569697","display_name":"Model checking","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C538199239","wikidata":"https://www.wikidata.org/wiki/Q640853","display_name":"Aviation safety","level":3,"score":0.26499998569488525},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.25589999556541443},{"id":"https://openalex.org/C2776544517","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Unexpected events","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C2989181227","wikidata":"https://www.wikidata.org/wiki/Q640853","display_name":"Flight safety","level":2,"score":0.25459998846054077}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/iros60139.2025.11247738","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247738","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/16ba2381-9d5f-4a02-8e19-72aa89a5cfd1","is_oa":false,"landing_page_url":"https://researchprofiles.ku.dk/da/publications/16ba2381-9d5f-4a02-8e19-72aa89a5cfd1","pdf_url":null,"source":{"id":"https://openalex.org/S4306401983","display_name":"Research at the University of Copenhagen (University of Copenhagen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I124055696","host_organization_name":"University of Copenhagen","host_organization_lineage":["https://openalex.org/I124055696"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Hadjiloizou , L , Welle , M C , Yin , H & Kragic , D 2025 , Towards Safe Reinforcement Learning with Reduced Conservativeness : A Case Study on Drone Flight Control . in IROS 2025 - 2025 IEEE/RSJ International Conference on Intelligent Robots and Systems, Conference Proceedings . IEEE , IEEE International Conference on Intelligent Robots and Systems , pp. 14870-14876 , 2025 IEEE/RSJ International Conference on Intelligent Robots and Systems, IROS 2025 , Hangzhou , China , 19/10/2025 . https://doi.org/10.1109/IROS60139.2025.11247738","raw_type":"contributionToPeriodical"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320334678","display_name":"European Research Council","ror":"https://ror.org/0472cxd90"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W134786152","https://openalex.org/W1569692898","https://openalex.org/W2003696022","https://openalex.org/W2053572490","https://openalex.org/W2127192854","https://openalex.org/W2758653969","https://openalex.org/W2901402208","https://openalex.org/W2963525569","https://openalex.org/W2964040381","https://openalex.org/W3152878473","https://openalex.org/W3175238547","https://openalex.org/W3210364203","https://openalex.org/W4285818823","https://openalex.org/W4293243623","https://openalex.org/W4324116431"],"related_works":[],"abstract_inverted_index":{"Incorporating":[0],"formal":[1,22,80],"methods":[2,81],"into":[3],"reinforcement":[4],"learning":[5,28,142],"(RL)":[6],"has":[7],"the":[8,13,19,25,59,62,77,94,107,117,128,137,148,156,169,172],"potential":[9],"to":[10,37,49,64,110,132],"result":[11],"in":[12,119],"best":[14],"of":[15,21,30,53,61,79,96,164,171],"both":[16],"worlds,":[17],"combining":[18],"robustness":[20],"guarantees":[23,150],"with":[24,141],"adaptability":[26],"and":[27,40,91,136,146],"capabilities":[29],"RL,":[31],"though":[32],"careful":[33],"design":[34],"is":[35,130,139],"needed":[36],"balance":[38],"safety":[39,60,95,108,149,170],"exploration.":[41],"In":[42],"this":[43,51],"work,":[44],"we":[45,68],"propose":[46],"a":[47,70,84,97,112,120,125,159],"framework":[48,118,138,157],"mitigate":[50],"loss":[52],"exploration":[54],"while":[55],"still":[56],"allowing":[57],"for":[58,106],"system":[63],"be":[65],"ensured.":[66],"Specifically,":[67],"introduce":[69],"less":[71,160],"restrictive":[72,161],"method":[73],"that":[74,155],"can":[75],"reduce":[76],"conservativeness":[78],"by":[82],"refining":[83],"disturbance":[85],"model":[86],"using":[87,100],"online":[88,145,162],"collected":[89],"data":[90],"it":[92],"evaluates":[93],"learning-based":[98,165],"controller,":[99],"computationally":[101],"efficient":[102],"zonotopic":[103],"reachability":[104],"analysis":[105,109],"facilitate":[111],"real-time":[113],"implementation.":[114],"We":[115],"validate":[116],"real-world":[121],"drone":[122,129],"flight":[123],"through":[124],"canyon,":[126],"where":[127],"subjected":[131],"unknown":[133],"external":[134],"disturbances":[135,144],"tasked":[140],"those":[143],"adjusting":[147],"accordingly.":[151],"The":[152],"results":[153],"show":[154],"enables":[158],"training":[163],"controllers":[166],"without":[167],"compromising":[168],"system.":[173]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-28T00:00:00"}
