{"id":"https://openalex.org/W4405784969","doi":"https://doi.org/10.1109/iros58592.2024.10802547","title":"Meta SAC-Lag: Towards Deployable Safe Reinforcement Learning via MetaGradient-based Hyperparameter Tuning","display_name":"Meta SAC-Lag: Towards Deployable Safe Reinforcement Learning via MetaGradient-based Hyperparameter Tuning","publication_year":2024,"publication_date":"2024-10-14","ids":{"openalex":"https://openalex.org/W4405784969","doi":"https://doi.org/10.1109/iros58592.2024.10802547"},"language":"en","primary_location":{"id":"doi:10.1109/iros58592.2024.10802547","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10802547","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050217922","display_name":"Homayoun Honari","orcid":null},"institutions":[{"id":"https://openalex.org/I212119943","display_name":"University of Victoria","ror":"https://ror.org/04s5mat29","country_code":"CA","type":"education","lineage":["https://openalex.org/I212119943"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Homayoun Honari","raw_affiliation_strings":["University of Victoria,Department of Mechanical Engineering,Victoria,Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Victoria,Department of Mechanical Engineering,Victoria,Canada","institution_ids":["https://openalex.org/I212119943"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038499955","display_name":"Amir M. Soufi Enayati","orcid":"https://orcid.org/0000-0002-6736-8016"},"institutions":[{"id":"https://openalex.org/I212119943","display_name":"University of Victoria","ror":"https://ror.org/04s5mat29","country_code":"CA","type":"education","lineage":["https://openalex.org/I212119943"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Amir M. Soufi Enayati","raw_affiliation_strings":["University of Victoria,Department of Mechanical Engineering,Victoria,Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Victoria,Department of Mechanical Engineering,Victoria,Canada","institution_ids":["https://openalex.org/I212119943"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021859498","display_name":"Mehran Ghafarian Tamizi","orcid":"https://orcid.org/0000-0002-8495-0374"},"institutions":[{"id":"https://openalex.org/I212119943","display_name":"University of Victoria","ror":"https://ror.org/04s5mat29","country_code":"CA","type":"education","lineage":["https://openalex.org/I212119943"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Mehran Ghafarian Tamizi","raw_affiliation_strings":["University of Victoria,Department of Electrical and Computer Engineering,Victoria,Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Victoria,Department of Electrical and Computer Engineering,Victoria,Canada","institution_ids":["https://openalex.org/I212119943"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058540009","display_name":"Homayoun Najjaran","orcid":"https://orcid.org/0000-0002-3550-225X"},"institutions":[{"id":"https://openalex.org/I212119943","display_name":"University of Victoria","ror":"https://ror.org/04s5mat29","country_code":"CA","type":"education","lineage":["https://openalex.org/I212119943"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Homayoun Najjaran","raw_affiliation_strings":["University of Victoria,Department of Mechanical Engineering,Victoria,Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Victoria,Department of Mechanical Engineering,Victoria,Canada","institution_ids":["https://openalex.org/I212119943"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I212119943"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20529464,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"619","last_page":"626"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9596999883651733,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.944599986076355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.9245063066482544},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8004332780838013},{"id":"https://openalex.org/keywords/lag","display_name":"Lag","score":0.6143252849578857},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5977500677108765},{"id":"https://openalex.org/keywords/meta-learning","display_name":"Meta learning (computer science)","score":0.5119827389717102},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.47756892442703247},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38879525661468506},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3390326499938965},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1481059193611145},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1257157027721405},{"id":"https://openalex.org/keywords/structural-engineering","display_name":"Structural engineering","score":0.05788317322731018}],"concepts":[{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.9245063066482544},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8004332780838013},{"id":"https://openalex.org/C75778745","wikidata":"https://www.wikidata.org/wiki/Q342626","display_name":"Lag","level":2,"score":0.6143252849578857},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5977500677108765},{"id":"https://openalex.org/C2781002164","wikidata":"https://www.wikidata.org/wiki/Q6822311","display_name":"Meta learning (computer science)","level":3,"score":0.5119827389717102},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.47756892442703247},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38879525661468506},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3390326499938965},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1481059193611145},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1257157027721405},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.05788317322731018},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros58592.2024.10802547","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10802547","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322675","display_name":"Mitacs","ror":"https://ror.org/00cjrc276"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W2081348312","https://openalex.org/W2158782408","https://openalex.org/W2169206416","https://openalex.org/W2907537824","https://openalex.org/W2990747716","https://openalex.org/W3121342653","https://openalex.org/W3150718622","https://openalex.org/W3195968524","https://openalex.org/W3203657119","https://openalex.org/W4206497039","https://openalex.org/W4385299175","https://openalex.org/W4392182143","https://openalex.org/W4402402057","https://openalex.org/W6682367392","https://openalex.org/W6684921986","https://openalex.org/W6727249380","https://openalex.org/W6736057607","https://openalex.org/W6738483526","https://openalex.org/W6743812932","https://openalex.org/W6747473740","https://openalex.org/W6751535212","https://openalex.org/W6751725685","https://openalex.org/W6751797489","https://openalex.org/W6751869817","https://openalex.org/W6755437240","https://openalex.org/W6757592117","https://openalex.org/W6771787661","https://openalex.org/W6780117753","https://openalex.org/W6780587392","https://openalex.org/W6780608970","https://openalex.org/W6782766965","https://openalex.org/W6784293744","https://openalex.org/W6784643869","https://openalex.org/W6787651164","https://openalex.org/W6790829992","https://openalex.org/W6803800456","https://openalex.org/W6858013149"],"related_works":["https://openalex.org/W4281847915","https://openalex.org/W3000635674","https://openalex.org/W3097663225","https://openalex.org/W98577079","https://openalex.org/W4301772239","https://openalex.org/W2948164010","https://openalex.org/W4297800546","https://openalex.org/W4294873804","https://openalex.org/W2804529069","https://openalex.org/W4383109125"],"abstract_inverted_index":{"Safe":[0],"Reinforcement":[1],"Learning":[2],"(Safe":[3],"RL)":[4],"is":[5,29,58,108,224,244],"one":[6],"of":[7,12,64,169,177,237,253,262],"the":[8,16,27,47,62,102,125,131,135,140,147,154,159,164,170,175,188,229,242,259],"prevalently":[9],"studied":[10],"subcategories":[11],"trial-and-error-based":[13],"methods":[14],"with":[15,117,214],"intention":[17],"to":[18,30,61,72,99,110,163,193,227,246,257],"be":[19,247],"deployed":[20],"on":[21,42,146],"real-world":[22,56,207,264],"systems.":[23],"In":[24,122],"safe":[25,53,112,254],"RL,":[26],"goal":[28],"maximize":[31],"reward":[32],"performance":[33,161,176],"while":[34,231],"minimizing":[35,232],"constraints,":[36],"often":[37],"achieved":[38],"by":[39],"setting":[40],"bounds":[41],"constraint":[43],"functions":[44],"and":[45,114,134,187],"utilizing":[46],"Lagrangian":[48,91,185],"method.":[49],"However,":[50],"deploying":[51],"Lagrangian-based":[52,84],"RL":[54,255],"in":[55,180,240],"scenarios":[57],"challenging":[59],"due":[60,162],"necessity":[63],"threshold":[65,115],"fine-tuning,":[66],"as":[67],"imprecise":[68],"adjustments":[69],"may":[70],"lead":[71],"suboptimal":[73],"policy":[74],"convergence.":[75],"To":[76],"mitigate":[77],"this":[78],"challenge,":[79],"we":[80,204],"propose":[81],"a":[82,206,210,218,248],"unified":[83],"model-free":[85],"architecture":[86],"called":[87],"Meta":[88,94,178,222,238],"Soft":[89],"Actor-Critic":[90],"(Meta":[92],"SAC-Lag).":[93],"SAC-Lag":[95,179,223,239],"uses":[96],"meta-gradient":[97],"optimization":[98],"automatically":[100],"update":[101],"safety-related":[103],"hyperparameters.":[104],"The":[105,235],"proposed":[106],"method":[107],"designed":[109],"address":[111],"exploration":[113],"adjustment":[116],"minimal":[118],"hyperparameter":[119],"tuning":[120],"requirement.":[121],"our":[123],"pipeline,":[124],"inner":[126],"parameters":[127],"are":[128,137,143],"updated":[129,148],"through":[130],"conventional":[132],"formulation":[133],"hyperparameters":[136],"adjusted":[138],"using":[139],"meta-objectives":[141],"which":[142],"defined":[144],"based":[145],"parameters.":[149],"Our":[150],"results":[151,189],"show":[152],"that":[153],"agent":[155],"can":[156],"reliably":[157],"adjust":[158],"safety":[160,171],"relatively":[165],"fast":[166],"convergence":[167],"rate":[168],"threshold.":[172],"We":[173],"evaluate":[174],"five":[181],"simulated":[182],"environments":[183],"against":[184],"baselines,":[186],"demonstrate":[190],"its":[191],"capability":[192],"create":[194],"synergy":[195],"between":[196],"parameters,":[197],"yielding":[198],"better":[199],"or":[200],"competitive":[201],"results.":[202],"Furthermore,":[203],"conduct":[205],"experiment":[208,243],"involving":[209],"robotic":[211],"arm":[212],"tasked":[213],"pouring":[215],"coffee":[216],"into":[217],"cup":[219],"without":[220,266],"spillage.":[221],"successfully":[225],"trained":[226],"execute":[228],"task,":[230],"effort":[233],"constraints.":[234],"success":[236],"performing":[241],"intended":[245],"step":[249],"toward":[250],"practical":[251],"deployment":[252],"algorithms":[256],"learn":[258],"control":[260],"process":[261],"safety-critical":[263],"systems":[265],"explicit":[267],"engineering.":[268]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
