{"id":"https://openalex.org/W4312790244","doi":"https://doi.org/10.1109/tsmc.2022.3213750","title":"Off-Policy Risk-Sensitive Reinforcement Learning-Based Constrained Robust Optimal Control","display_name":"Off-Policy Risk-Sensitive Reinforcement Learning-Based Constrained Robust Optimal Control","publication_year":2022,"publication_date":"2022-11-03","ids":{"openalex":"https://openalex.org/W4312790244","doi":"https://doi.org/10.1109/tsmc.2022.3213750"},"language":"en","primary_location":{"id":"doi:10.1109/tsmc.2022.3213750","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2022.3213750","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100331572","display_name":"Cong Li","orcid":"https://orcid.org/0000-0002-1103-4818"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Cong Li","raw_affiliation_strings":["Chair of Automatic Control Engineering, Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Chair of Automatic Control Engineering, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054350814","display_name":"Qingchen Liu","orcid":"https://orcid.org/0000-0002-5892-3591"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingchen Liu","raw_affiliation_strings":["Department of Automation, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026971084","display_name":"Zhehua Zhou","orcid":"https://orcid.org/0000-0001-9542-4858"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Zhehua Zhou","raw_affiliation_strings":["Chair of Automatic Control Engineering, Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Chair of Automatic Control Engineering, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081223790","display_name":"Martin Buss","orcid":"https://orcid.org/0000-0002-1776-2752"},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Martin Buss","raw_affiliation_strings":["Chair of Automatic Control Engineering, Technical University of Munich, Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Chair of Automatic Control Engineering, Technical University of Munich, Munich, Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100683476","display_name":"Fangzhou Liu","orcid":"https://orcid.org/0000-0002-1275-4809"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangzhou Liu","raw_affiliation_strings":["Research Center of Intelligent Control and Systems, Harbin Institute of Technology, Harbin, China"],"affiliations":[{"raw_affiliation_string":"Research Center of Intelligent Control and Systems, Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100331572"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":1.9544,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.88211843,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"53","issue":"4","first_page":"2478","last_page":"2491"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10675","display_name":"Mechanical Circulatory Support Devices","score":0.9613999724388123,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.854641318321228},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6719174385070801},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.655079185962677},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5542223453521729},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.514633297920227},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.5070197582244873},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.506131112575531},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5003542900085449},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.48465949296951294},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.4480932354927063},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.4466102123260498},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.441829651594162},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.438278466463089},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.43706005811691284},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2935197651386261},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.23996347188949585},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19336235523223877},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.17611172795295715},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14580431580543518}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.854641318321228},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6719174385070801},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.655079185962677},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5542223453521729},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.514633297920227},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.5070197582244873},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.506131112575531},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5003542900085449},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.48465949296951294},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.4480932354927063},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.4466102123260498},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.441829651594162},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.438278466463089},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.43706005811691284},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2935197651386261},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.23996347188949585},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19336235523223877},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.17611172795295715},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14580431580543518},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsmc.2022.3213750","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2022.3213750","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.8100000023841858}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W5108103","https://openalex.org/W234738007","https://openalex.org/W404241533","https://openalex.org/W612528811","https://openalex.org/W648594167","https://openalex.org/W1461376240","https://openalex.org/W1487127700","https://openalex.org/W1983523797","https://openalex.org/W2048687352","https://openalex.org/W2062373349","https://openalex.org/W2104843094","https://openalex.org/W2107431923","https://openalex.org/W2108286682","https://openalex.org/W2154185762","https://openalex.org/W2157577161","https://openalex.org/W2165726932","https://openalex.org/W2167757582","https://openalex.org/W2201581102","https://openalex.org/W2794019208","https://openalex.org/W2884094885","https://openalex.org/W2901363724","https://openalex.org/W2963148914","https://openalex.org/W2963868336","https://openalex.org/W2982646820","https://openalex.org/W2996568167","https://openalex.org/W3021669587","https://openalex.org/W3104184956","https://openalex.org/W3114263192","https://openalex.org/W3117706845","https://openalex.org/W3127275112","https://openalex.org/W3128626283","https://openalex.org/W3137245548","https://openalex.org/W3142915818","https://openalex.org/W3172957127","https://openalex.org/W3177033787","https://openalex.org/W4283727098","https://openalex.org/W6687681856","https://openalex.org/W6780404908","https://openalex.org/W6787470483"],"related_works":["https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W3096874164","https://openalex.org/W2166117066","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W3099153698","https://openalex.org/W1859185863","https://openalex.org/W3087814763","https://openalex.org/W2025663273"],"abstract_inverted_index":{"This":[0],"article":[1],"proposes":[2],"an":[3,51,58],"off-policy":[4,59,101],"risk-sensitive":[5,34],"reinforcement":[6],"learning":[7,76],"(RL)-based":[8],"control":[9,32,55,82,161],"framework":[10],"to":[11,42,64,69,86,99,122,127,132],"jointly":[12],"optimize":[13],"the":[14,30,44,66,70,75,78,100,106,110,139,156,159],"task":[15],"performance":[16],"and":[17,33,36,90,117,147],"constraint":[18],"satisfaction":[19],"in":[20],"a":[21,134],"disturbed":[22],"environment.":[23],"The":[24,142,152],"risk-aware":[25,71],"value":[26,72],"function,":[27],"constructed":[28],"using":[29],"pseudo":[31],"input":[35,89],"state":[37,91],"penalty":[38],"terms,":[39],"is":[40,62,84,113],"introduced":[41],"convert":[43],"original":[45],"constrained":[46],"robust":[47],"stabilization":[48],"problem":[49],"into":[50],"equivalent":[52],"unconstrained":[53],"optimal":[54,81],"problem.":[56],"Then,":[57],"RL":[60],"algorithm":[61],"developed":[63,121],"learn":[65],"approximate":[67,80],"solution":[68],"function.":[73],"During":[74],"process,":[77],"associated":[79],"policy":[83],"able":[85],"satisfy":[87],"both":[88],"constraints":[92],"under":[93],"disturbances.":[94],"By":[95],"replaying":[96],"experience":[97,130],"data":[98,131],"weight":[102,111,140,148],"update":[103],"law":[104],"of":[105,144,158],"critic":[107],"neural":[108],"network,":[109],"convergence":[112,149],"guaranteed.":[114],"Moreover,":[115],"online":[116],"offline":[118],"algorithms":[119],"are":[120,150],"serve":[123],"as":[124],"principled":[125],"ways":[126],"record":[128],"informative":[129],"achieve":[133],"sufficient":[135],"excitation":[136],"required":[137],"for":[138],"convergence.":[141],"proofs":[143],"system":[145],"stability":[146],"provided.":[151],"Simulation":[153],"results":[154],"reveal":[155],"validity":[157],"proposed":[160],"framework.":[162]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
