{"id":"https://openalex.org/W4416749144","doi":"https://doi.org/10.1109/iros60139.2025.11246408","title":"Disturbance Observer-based Control Barrier Functions with Residual Model Learning for Safe Reinforcement Learning","display_name":"Disturbance Observer-based Control Barrier Functions with Residual Model Learning for Safe Reinforcement Learning","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416749144","doi":"https://doi.org/10.1109/iros60139.2025.11246408"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11246408","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246408","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024134293","display_name":"Dvij Kalaria","orcid":"https://orcid.org/0009-0003-3610-9921"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dvij Kalaria","raw_affiliation_strings":["Carnegie Mellon University,Robotics Institute"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Robotics Institute","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103249328","display_name":"Qin Lin","orcid":"https://orcid.org/0000-0002-5703-9112"},"institutions":[{"id":"https://openalex.org/I44461941","display_name":"University of Houston","ror":"https://ror.org/048sx0r50","country_code":"US","type":"education","lineage":["https://openalex.org/I44461941"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qin Lin","raw_affiliation_strings":["University of Houston,Department of Engineering Technology, Department of Electrical and Computer Engineering"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Houston,Department of Engineering Technology, Department of Electrical and Computer Engineering","institution_ids":["https://openalex.org/I44461941"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015076162","display_name":"John M. Dolan","orcid":"https://orcid.org/0000-0003-2062-100X"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John M. Dolan","raw_affiliation_strings":["Carnegie Mellon University,Robotics Institute"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,Robotics Institute","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1749713,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2345","last_page":"2351"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6128000020980835,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6128000020980835,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.22830000519752502,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.03220000118017197,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.715399980545044},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6251999735832214},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5755000114440918},{"id":"https://openalex.org/keywords/disturbance","display_name":"Disturbance (geology)","score":0.5052000284194946},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4399000108242035},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.42719998955726624},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.413100004196167},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.3546999990940094}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7184000015258789},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.715399980545044},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6251999735832214},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5755000114440918},{"id":"https://openalex.org/C2777601987","wikidata":"https://www.wikidata.org/wiki/Q5283581","display_name":"Disturbance (geology)","level":2,"score":0.5052000284194946},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4399000108242035},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.42719998955726624},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4223000109195709},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.413100004196167},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3546999990940094},{"id":"https://openalex.org/C2780704645","wikidata":"https://www.wikidata.org/wiki/Q9251458","display_name":"Observer (physics)","level":2,"score":0.34700000286102295},{"id":"https://openalex.org/C31531917","wikidata":"https://www.wikidata.org/wiki/Q915157","display_name":"Robust control","level":3,"score":0.34360000491142273},{"id":"https://openalex.org/C28427503","wikidata":"https://www.wikidata.org/wiki/Q13580300","display_name":"Internal model","level":3,"score":0.3098999857902527},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C79487989","wikidata":"https://www.wikidata.org/wiki/Q934680","display_name":"Vehicle dynamics","level":2,"score":0.28360000252723694},{"id":"https://openalex.org/C77405623","wikidata":"https://www.wikidata.org/wiki/Q598451","display_name":"System dynamics","level":2,"score":0.27730000019073486},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.26980000734329224},{"id":"https://openalex.org/C117619785","wikidata":"https://www.wikidata.org/wiki/Q6094414","display_name":"Iterative learning control","level":3,"score":0.26429998874664307},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11246408","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246408","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1990851787","https://openalex.org/W2145339207","https://openalex.org/W2158782408","https://openalex.org/W2230652337","https://openalex.org/W2605102758","https://openalex.org/W2767050701","https://openalex.org/W2911087563","https://openalex.org/W2963184939","https://openalex.org/W2966735560","https://openalex.org/W2968945909","https://openalex.org/W3035981335","https://openalex.org/W3045838296","https://openalex.org/W3121342653","https://openalex.org/W3127561923","https://openalex.org/W3175254947","https://openalex.org/W3195968524","https://openalex.org/W3211352205","https://openalex.org/W4313030892","https://openalex.org/W4313166442","https://openalex.org/W4313196885","https://openalex.org/W4366748330","https://openalex.org/W4383109238","https://openalex.org/W4388903822","https://openalex.org/W4401414258","https://openalex.org/W4401414876","https://openalex.org/W4403677966"],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,155],"(RL)":[2],"agents":[3],"need":[4],"to":[5,9,52],"explore":[6],"their":[7],"environment":[8],"learn":[10],"optimal":[11],"behaviors":[12],"and":[13,85,112,124,138],"achieve":[14],"maximum":[15],"rewards.":[16],"However,":[17,59],"exploration":[18],"can":[19,146],"be":[20],"risky":[21],"when":[22],"training":[23,31],"RL":[24,57,99],"directly":[25],"on":[26,67,102,132,141],"real":[27],"systems,":[28],"while":[29],"simulation-based":[30],"introduces":[32],"the":[33,37,60,81,107,110,133,164],"tricky":[34],"issue":[35],"of":[36,64,83,121,166],"sim-to-real":[38],"gap.":[39],"Recent":[40],"approaches":[41,149],"have":[42],"leveraged":[43],"safety":[44,62],"filters,":[45],"such":[46,88],"as":[47,89],"control":[48],"barrier":[49],"functions":[50],"(CBFs),":[51],"penalize":[53],"unsafe":[54],"actions":[55],"during":[56],"training.":[58],"strong":[61],"guarantees":[63],"CBFs":[65],"rely":[66],"a":[68,96,103,119,157,170],"precise":[69],"dynamic":[70,114],"model.":[71],"In":[72,91],"practice,":[73],"uncertainties":[74],"always":[75],"exist,":[76],"including":[77],"internal":[78],"disturbances":[79,87],"from":[80],"errors":[82],"dynamics":[84],"external":[86],"wind.":[90],"this":[92],"work,":[93],"we":[94,145],"propose":[95],"novel":[97],"safe":[98],"framework":[100,168],"built":[101],"robust":[104],"CBF,":[105],"where":[106,144],"discrepancy":[108],"between":[109],"nominal":[111],"true":[113],"models":[115],"is":[116],"quantified":[117],"through":[118],"combination":[120],"disturbance":[122,158],"observation":[123],"residual":[125,153],"model":[126,154],"learning.":[127],"We":[128,161],"demonstrate":[129],"our":[130,167],"results":[131],"Safety-gym":[134],"benchmark":[135],"for":[136],"Point":[137],"Car":[139],"robots":[140],"all":[142],"tasks":[143],"outperform":[147],"state-of-the-art":[148],"that":[150],"use":[151],"only":[152],"or":[156],"observer":[159],"(DOB).":[160],"further":[162],"validate":[163],"efficacy":[165],"using":[169],"physical":[171],"F1/10":[172],"racing":[173],"car.Videos:":[174],"https://sites.google.com/view/res-dob-cbf-rl":[175]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-28T00:00:00"}