{"id":"https://openalex.org/W4417012230","doi":"https://doi.org/10.1109/tac.2025.3587111","title":"Policy Gradient Methods for the Cost-Constrained LQR: Strong Duality and Global Convergence","display_name":"Policy Gradient Methods for the Cost-Constrained LQR: Strong Duality and Global Convergence","publication_year":2025,"publication_date":"2025-07-08","ids":{"openalex":"https://openalex.org/W4417012230","doi":"https://doi.org/10.1109/tac.2025.3587111"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2025.3587111","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2025.3587111","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026081309","display_name":"Feiran Zhao","orcid":"https://orcid.org/0000-0003-3142-2903"},"institutions":[{"id":"https://openalex.org/I4210156423","display_name":"National Engineering Research Center for Information Technology in Agriculture","ror":"https://ror.org/04c3j3t84","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210156423"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Feiran Zhao","raw_affiliation_strings":["Department of Automation and Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation and Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I4210156423"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088962631","display_name":"Keyou You","orcid":"https://orcid.org/0000-0003-4355-5340"},"institutions":[{"id":"https://openalex.org/I4210156423","display_name":"National Engineering Research Center for Information Technology in Agriculture","ror":"https://ror.org/04c3j3t84","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210156423"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Keyou You","raw_affiliation_strings":["Department of Automation and Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Automation and Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I4210156423"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5026081309"],"corresponding_institution_ids":["https://openalex.org/I4210156423","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20185832,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"70","issue":"12","first_page":"8414","last_page":"8421"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.4726000130176544,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.4726000130176544,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.39419999718666077,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10046","display_name":"Stability and Control of Uncertain Systems","score":0.022600000724196434,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lipschitz-continuity","display_name":"Lipschitz continuity","score":0.6987000107765198},{"id":"https://openalex.org/keywords/duality","display_name":"Duality (order theory)","score":0.6959999799728394},{"id":"https://openalex.org/keywords/constructive","display_name":"Constructive","score":0.6315000057220459},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5830000042915344},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.5310999751091003},{"id":"https://openalex.org/keywords/multiplier","display_name":"Multiplier (economics)","score":0.5200999975204468},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5131999850273132},{"id":"https://openalex.org/keywords/constructive-proof","display_name":"Constructive proof","score":0.5062000155448914}],"concepts":[{"id":"https://openalex.org/C22324862","wikidata":"https://www.wikidata.org/wiki/Q652707","display_name":"Lipschitz continuity","level":2,"score":0.6987000107765198},{"id":"https://openalex.org/C2778023678","wikidata":"https://www.wikidata.org/wiki/Q554403","display_name":"Duality (order theory)","level":2,"score":0.6959999799728394},{"id":"https://openalex.org/C2778701210","wikidata":"https://www.wikidata.org/wiki/Q28130034","display_name":"Constructive","level":3,"score":0.6315000057220459},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5830000042915344},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5601999759674072},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.5310999751091003},{"id":"https://openalex.org/C124584101","wikidata":"https://www.wikidata.org/wiki/Q1053266","display_name":"Multiplier (economics)","level":2,"score":0.5200999975204468},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5142999887466431},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5131999850273132},{"id":"https://openalex.org/C202854965","wikidata":"https://www.wikidata.org/wiki/Q3044470","display_name":"Constructive proof","level":2,"score":0.5062000155448914},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.43720000982284546},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.4359000027179718},{"id":"https://openalex.org/C163863214","wikidata":"https://www.wikidata.org/wiki/Q7624553","display_name":"Strong duality","level":3,"score":0.4284000098705292},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.36489999294281006},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.35249999165534973},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.3517000079154968},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.3345000147819519},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.334199994802475},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.29589998722076416},{"id":"https://openalex.org/C5274546","wikidata":"https://www.wikidata.org/wiki/Q5310264","display_name":"Duality gap","level":3,"score":0.2696000039577484}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tac.2025.3587111","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2025.3587111","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1850434087","display_name":null,"funder_award_id":"62033006","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3518171487","display_name":null,"funder_award_id":"62325305","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W315644267","https://openalex.org/W1144593952","https://openalex.org/W2020123437","https://openalex.org/W2030109435","https://openalex.org/W2076037605","https://openalex.org/W2126682207","https://openalex.org/W2513180554","https://openalex.org/W2885504492","https://openalex.org/W2963525569","https://openalex.org/W2967292964","https://openalex.org/W3118666236","https://openalex.org/W4206497039","https://openalex.org/W4226043618","https://openalex.org/W4238753042","https://openalex.org/W4250739957","https://openalex.org/W4313591116","https://openalex.org/W4399453180","https://openalex.org/W4402300769","https://openalex.org/W4410342707"],"related_works":[],"abstract_inverted_index":{"In":[0],"safety-critical":[1],"applications,":[2],"reinforcement":[3],"learning":[4],"(RL)":[5],"needs":[6],"to":[7,46,58,114],"consider":[8],"safety":[9],"constraints.":[10,47],"However,":[11],"theoretical":[12,117],"understandings":[13],"of":[14,37,68,85],"constrained":[15],"RL":[16],"for":[17,78,105],"continuous":[18],"control":[19],"are":[20,44],"largely":[21],"absent.":[22],"As":[23],"a":[24,30,35,53,75,82],"case":[25],"study,":[26],"this":[27],"paper":[28],"presents":[29],"cost-constrained":[31,70],"LQR":[32,38,71],"formulation,":[33],"where":[34],"number":[36],"costs":[39],"with":[40],"user-defined":[41],"penalty":[42],"matrices":[43],"subject":[45],"To":[48],"solve":[49],"it,":[50],"we":[51,73,100,111],"propose":[52],"policy":[54],"gradient":[55],"primal-dual":[56,108],"method":[57],"find":[59],"an":[60,86],"optimal":[61,87],"state":[62],"feedback":[63],"gain.":[64],"Despite":[65],"the":[66,69,93,106],"non-convexity":[67],"problem,":[72],"provide":[74,102],"constructive":[76],"proof":[77],"strong":[79],"duality":[80],"and":[81],"geometric":[83],"interpretation":[84],"multiplier":[88],"set.":[89],"By":[90],"proving":[91],"that":[92],"concave":[94],"dual":[95],"function":[96],"is":[97],"Lipschitz":[98],"smooth,":[99],"further":[101],"convergence":[103],"guarantees":[104],"PG":[107],"method.":[109],"Finally,":[110],"perform":[112],"simulations":[113],"validate":[115],"our":[116],"findings.":[118]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
