{"id":"https://openalex.org/W7127387172","doi":"https://doi.org/10.48550/arxiv.2602.00587","title":"Safe Langevin Soft Actor Critic","display_name":"Safe Langevin Soft Actor Critic","publication_year":2026,"publication_date":"2026-01-31","ids":{"openalex":"https://openalex.org/W7127387172","doi":"https://doi.org/10.48550/arxiv.2602.00587"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.00587","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121079551","display_name":"Mahesh Keswani","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Keswani, Mahesh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101608990","display_name":"Samyak Jain","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jain, Samyak","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5017330847","display_name":"Raunak Bhattacharyya","orcid":"https://orcid.org/0000-0001-9254-8417"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhattacharyya, Raunak P.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5121079551"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.576200008392334,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.576200008392334,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.15189999341964722,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.11050000041723251,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cvar","display_name":"CVAR","score":0.930400013923645},{"id":"https://openalex.org/keywords/langevin-dynamics","display_name":"Langevin dynamics","score":0.6621999740600586},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5838000178337097},{"id":"https://openalex.org/keywords/maxima-and-minima","display_name":"Maxima and minima","score":0.5286999940872192},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4733000099658966},{"id":"https://openalex.org/keywords/lagrangian-relaxation","display_name":"Lagrangian relaxation","score":0.4009999930858612},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.39480000734329224},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.3668000102043152},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.3431999981403351}],"concepts":[{"id":"https://openalex.org/C2779922397","wikidata":"https://www.wikidata.org/wiki/Q5014755","display_name":"CVAR","level":4,"score":0.930400013923645},{"id":"https://openalex.org/C2780004032","wikidata":"https://www.wikidata.org/wiki/Q6485978","display_name":"Langevin dynamics","level":2,"score":0.6621999740600586},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5909000039100647},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5838000178337097},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5296000242233276},{"id":"https://openalex.org/C186633575","wikidata":"https://www.wikidata.org/wiki/Q845060","display_name":"Maxima and minima","level":2,"score":0.5286999940872192},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4733000099658966},{"id":"https://openalex.org/C91765299","wikidata":"https://www.wikidata.org/wiki/Q3424292","display_name":"Lagrangian relaxation","level":2,"score":0.4009999930858612},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.39480000734329224},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.3668000102043152},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.3431999981403351},{"id":"https://openalex.org/C118671147","wikidata":"https://www.wikidata.org/wiki/Q578714","display_name":"Quantile","level":2,"score":0.34290000796318054},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.3418999910354614},{"id":"https://openalex.org/C73684929","wikidata":"https://www.wikidata.org/wiki/Q598870","display_name":"Lagrange multiplier","level":2,"score":0.3359000086784363},{"id":"https://openalex.org/C129844170","wikidata":"https://www.wikidata.org/wiki/Q41299","display_name":"Quadratic equation","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C5496284","wikidata":"https://www.wikidata.org/wiki/Q5420856","display_name":"Expected shortfall","level":3,"score":0.3043000102043152},{"id":"https://openalex.org/C81845259","wikidata":"https://www.wikidata.org/wiki/Q290117","display_name":"Quadratic programming","level":2,"score":0.2980000078678131},{"id":"https://openalex.org/C122123141","wikidata":"https://www.wikidata.org/wiki/Q176623","display_name":"Random variable","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C8272713","wikidata":"https://www.wikidata.org/wiki/Q176737","display_name":"Stochastic process","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C2778068216","wikidata":"https://www.wikidata.org/wiki/Q55019500","display_name":"Malice","level":2,"score":0.28600001335144043},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.28209999203681946},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27379998564720154},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.2700999975204468},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.2572999894618988},{"id":"https://openalex.org/C2776029896","wikidata":"https://www.wikidata.org/wiki/Q3935810","display_name":"Relaxation (psychology)","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.25440001487731934}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.00587","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.00587","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.00587","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.00587","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/1","display_name":"No poverty","score":0.6907132863998413}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Balancing":[0],"reward":[1,60],"and":[2,18,42,65,87,114],"safety":[3],"in":[4,136,151],"constrained":[5],"reinforcement":[6],"learning":[7],"remains":[8],"challenging":[9],"due":[10],"to":[11,155],"poor":[12,68],"generalization":[13],"from":[14,67],"sharp":[15],"value":[16],"minima":[17],"inadequate":[19],"handling":[20],"of":[21,103,139,149],"heavy-tailed":[22],"risk":[23,44],"distribution.":[24],"We":[25,106],"introduce":[26],"Safe":[27],"Langevin":[28,56],"Soft":[29],"Actor-Critic":[30],"(SL-SAC),":[31],"a":[32,89],"principled":[33],"algorithm":[34],"that":[35,94,116],"addresses":[36],"both":[37],"issues":[38],"through":[39],"parameter-space":[40],"exploration":[41],"distributional":[43,71],"control.":[45],"Our":[46],"approach":[47],"combines":[48],"three":[49],"key":[50],"mechanisms:":[51],"(1)":[52],"Adaptive":[53],"Stochastic":[54],"Gradient":[55],"Dynamics":[57],"(aSGLD)":[58],"for":[59,84],"critics,":[61],"promoting":[62],"ensemble":[63],"diversity":[64],"escape":[66],"optima;":[69],"(2)":[70],"cost":[72,135,147],"estimation":[73,112],"via":[74],"Implicit":[75],"Quantile":[76],"Networks":[77],"(IQN)":[78],"with":[79,146],"Conditional":[80],"Value-at-Risk":[81],"(CVaR)":[82],"optimization":[83],"tail-risk":[85],"mitigation;":[86],"(3)":[88],"reactive":[90],"Lagrangian":[91],"relaxation":[92],"scheme":[93],"adapts":[95],"constraint":[96,122],"enforcement":[97],"based":[98],"on":[99,110],"the":[100,133],"empirical":[101],"CVaR":[102,111],"episodic":[104],"costs.":[105],"provide":[107],"theoretical":[108],"guarantees":[109],"error":[113],"demonstrate":[115],"CVaR-based":[117],"Lagrange":[118],"updates":[119],"yield":[120],"stronger":[121],"violation":[123],"signals":[124],"than":[125],"expected-cost":[126],"updates.":[127],"On":[128],"Safety-Gymnasium":[129],"benchmarks,":[130],"SL-SAC":[131],"achieves":[132],"lowest":[134],"7":[137],"out":[138],"10":[140],"tasks":[141,153],"while":[142],"maintaining":[143],"competitive":[144],"returns,":[145],"reductions":[148],"19-63%":[150],"velocity":[152],"compared":[154],"state-of-the-art":[156],"baselines.":[157]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-04T00:00:00"}
