{"id":"https://openalex.org/W4389665448","doi":"https://doi.org/10.1109/iros55552.2023.10341973","title":"Dual Variable Actor-Critic for Adaptive Safe Reinforcement Learning","display_name":"Dual Variable Actor-Critic for Adaptive Safe Reinforcement Learning","publication_year":2023,"publication_date":"2023-10-01","ids":{"openalex":"https://openalex.org/W4389665448","doi":"https://doi.org/10.1109/iros55552.2023.10341973"},"language":"en","primary_location":{"id":"doi:10.1109/iros55552.2023.10341973","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/iros55552.2023.10341973","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101705399","display_name":"Junseo Lee","orcid":"https://orcid.org/0009-0009-4953-1383"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Junseo Lee","raw_affiliation_strings":["Graduate School of Artificial Intelligence (GSAI) and ASRI, Seoul National University,Seoul,Korea","Graduate School of Artificial Intelligence (GSAI) and ASRI, Seoul National University, Seoul, Korea"],"affiliations":[{"raw_affiliation_string":"Graduate School of Artificial Intelligence (GSAI) and ASRI, Seoul National University,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Graduate School of Artificial Intelligence (GSAI) and ASRI, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000066514","display_name":"Jaeseok Heo","orcid":"https://orcid.org/0000-0003-1678-6573"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jaeseok Heo","raw_affiliation_strings":["Seoul National University,Department of Electrical and Computer Enginnering and ASRI,Seoul,Korea","Department of Electrical and Computer Enginnering and ASRI, Seoul National University, Seoul, Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University,Department of Electrical and Computer Enginnering and ASRI,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Department of Electrical and Computer Enginnering and ASRI, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044874311","display_name":"Dohyeong Kim","orcid":"https://orcid.org/0000-0003-0788-6089"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Dohyeong Kim","raw_affiliation_strings":["Seoul National University,Department of Electrical and Computer Enginnering and ASRI,Seoul,Korea","Department of Electrical and Computer Enginnering and ASRI, Seoul National University, Seoul, Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University,Department of Electrical and Computer Enginnering and ASRI,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Department of Electrical and Computer Enginnering and ASRI, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024425360","display_name":"Gunmin Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Gunmin Lee","raw_affiliation_strings":["Seoul National University,Department of Electrical and Computer Enginnering and ASRI,Seoul,Korea","Department of Electrical and Computer Enginnering and ASRI, Seoul National University, Seoul, Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University,Department of Electrical and Computer Enginnering and ASRI,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Department of Electrical and Computer Enginnering and ASRI, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033764106","display_name":"Songhwai Oh","orcid":"https://orcid.org/0000-0002-9781-2018"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Songhwai Oh","raw_affiliation_strings":["Graduate School of Artificial Intelligence (GSAI) and ASRI, Seoul National University,Seoul,Korea","Graduate School of Artificial Intelligence (GSAI) and ASRI, Seoul National University, Seoul, Korea","Department of Electrical and Computer Enginnering and ASRI, Seoul National University, Seoul, Korea"],"affiliations":[{"raw_affiliation_string":"Graduate School of Artificial Intelligence (GSAI) and ASRI, Seoul National University,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Graduate School of Artificial Intelligence (GSAI) and ASRI, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Department of Electrical and Computer Enginnering and ASRI, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101705399"],"corresponding_institution_ids":["https://openalex.org/I139264467"],"apc_list":null,"apc_paid":null,"fwci":0.1748,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.58713731,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"7568","last_page":"7573"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8429161310195923},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.7610407471656799},{"id":"https://openalex.org/keywords/variable","display_name":"Variable (mathematics)","score":0.6764393448829651},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6649898290634155},{"id":"https://openalex.org/keywords/pareto-principle","display_name":"Pareto principle","score":0.6321985721588135},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5831403136253357},{"id":"https://openalex.org/keywords/state-variable","display_name":"State variable","score":0.4670281410217285},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4585838317871094},{"id":"https://openalex.org/keywords/argument","display_name":"Argument (complex analysis)","score":0.43712249398231506},{"id":"https://openalex.org/keywords/augmented-lagrangian-method","display_name":"Augmented Lagrangian method","score":0.43588346242904663},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.42512738704681396},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3559545874595642},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18646642565727234},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.13743895292282104}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8429161310195923},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.7610407471656799},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.6764393448829651},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6649898290634155},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.6321985721588135},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5831403136253357},{"id":"https://openalex.org/C129537906","wikidata":"https://www.wikidata.org/wiki/Q7603913","display_name":"State variable","level":2,"score":0.4670281410217285},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4585838317871094},{"id":"https://openalex.org/C98184364","wikidata":"https://www.wikidata.org/wiki/Q1780131","display_name":"Argument (complex analysis)","level":2,"score":0.43712249398231506},{"id":"https://openalex.org/C150452318","wikidata":"https://www.wikidata.org/wiki/Q4820432","display_name":"Augmented Lagrangian method","level":2,"score":0.43588346242904663},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.42512738704681396},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3559545874595642},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18646642565727234},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.13743895292282104},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros55552.2023.10341973","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/iros55552.2023.10341973","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G4340828026","display_name":null,"funder_award_id":"2019-0-01190","funder_id":"https://openalex.org/F4320324891","funder_display_name":"Iran Telecommunication Research Center"}],"funders":[{"id":"https://openalex.org/F4320324891","display_name":"Iran Telecommunication Research Center","ror":"https://ror.org/01a3g2z22"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2027579135","https://openalex.org/W2575705757","https://openalex.org/W2989847975","https://openalex.org/W3118210634","https://openalex.org/W3129424093","https://openalex.org/W3176452384","https://openalex.org/W4285066191","https://openalex.org/W6737893269","https://openalex.org/W6747473740","https://openalex.org/W6754554871","https://openalex.org/W6766952794","https://openalex.org/W6770009701","https://openalex.org/W6774126978","https://openalex.org/W6775522024","https://openalex.org/W6779265984","https://openalex.org/W6780587392","https://openalex.org/W6783988234","https://openalex.org/W6800221206","https://openalex.org/W6804456054","https://openalex.org/W6810667139"],"related_works":["https://openalex.org/W2015060724","https://openalex.org/W2919324478","https://openalex.org/W4400665899","https://openalex.org/W2130120519","https://openalex.org/W2058578573","https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W2025663273","https://openalex.org/W3099153698","https://openalex.org/W2338801089"],"abstract_inverted_index":{"Satisfying":[0],"safety":[1,91],"constraints":[2],"in":[3,12,135],"reinforcement":[4],"learning":[5],"(RL)":[6],"is":[7,120,161,177],"an":[8,107],"important":[9],"issue,":[10],"especially":[11],"real-world":[13,138],"applications.":[14],"Many":[15],"studies":[16],"have":[17,102],"approached":[18],"safe":[19,80,151],"RL":[20,81],"with":[21,34,144,169,183],"the":[22,35,47,51,62,79,112,117,125,132,145,157,170,175,197],"Lagrangian":[23],"method,":[24],"which":[25,54,101],"introduces":[26],"dual":[27,37,52,73,104,158,194],"variables.":[28],"However,":[29],"applying":[30],"a":[31,40,56,70,86,95,103,192],"trained":[32],"policy":[33,88,97,119,128,142],"optimal":[36,48,127,164],"variable":[38,74,105,195],"to":[39,122,124,152,156,166,179,181],"new":[41,71],"environment":[42],"can":[43],"be":[44],"hazardous":[45],"since":[46],"value":[49],"of":[50,58],"variable,":[53],"represents":[55],"level":[57],"safety,":[59],"depends":[60],"on":[61],"environmental":[63],"setting.":[64],"To":[65],"this":[66],"end,":[67],"we":[68,110],"propose":[69],"framework,":[72],"actor-critic":[75,114],"(DVAC),":[76],"that":[77,116],"solves":[78],"problem":[82],"by":[83,190],"simultaneously":[84],"training":[85,189],"single":[87],"over":[89],"different":[90],"levels.":[92],"We":[93,130],"introduce":[94],"universal":[96,99,118,141],"and":[98,137,160],"Q-function,":[100],"as":[106],"argument.":[108],"Then,":[109],"extend":[111],"soft":[113],"so":[115],"guaranteed":[121],"converge":[123],"Pareto":[126,163],"sets.":[129],"evaluate":[131],"proposed":[133,146,198],"method":[134,147],"simulation":[136],"environments.":[139],"The":[140],"learned":[143,168],"ranges":[148],"from":[149],"extremely":[150],"high":[153],"performance":[154],"according":[155],"variables,":[159],"nearly":[162],"compared":[165],"policies":[167],"baseline":[171],"methods.":[172],"In":[173],"addition,":[174],"agent":[176],"able":[178],"adapt":[180],"environments":[182],"unseen":[184],"state":[185],"distributions":[186],"without":[187],"additional":[188],"identifying":[191],"suitable":[193],"using":[196],"method.":[199]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-25T23:11:45.687758","created_date":"2025-10-10T00:00:00"}
