{"id":"https://openalex.org/W7138158283","doi":"https://doi.org/10.1609/aaai.v40i32.39978","title":"Proactive Constrained Policy Optimization with Preemptive Penalty","display_name":"Proactive Constrained Policy Optimization with Preemptive Penalty","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138158283","doi":"https://doi.org/10.1609/aaai.v40i32.39978"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i32.39978","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i32.39978","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39978/43939","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39978/43939","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129736668","display_name":"Ning Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ning Yang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129676587","display_name":"Pengyu Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengyu Wang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences\nSchool of Science and Engineering, The Chinese University of Hong Kong (Shenzhen), Longgang, Shenzhen, Guangdong, 518172, P.R. China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences\nSchool of Science and Engineering, The Chinese University of Hong Kong (Shenzhen), Longgang, Shenzhen, Guangdong, 518172, P.R. China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129641938","display_name":"Guoqing Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Guoqing Liu","raw_affiliation_strings":["Microsoft Research"],"affiliations":[{"raw_affiliation_string":"Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129656271","display_name":"Haifeng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haifeng Zhang","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129649388","display_name":"Pin Lyu","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pin Lyu","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129727263","display_name":"Jun Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jun Wang","raw_affiliation_strings":["University College London"],"affiliations":[{"raw_affiliation_string":"University College London","institution_ids":["https://openalex.org/I45129253"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5129736668"],"corresponding_institution_ids":["https://openalex.org/I19820366"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37932872,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"32","first_page":"27583","last_page":"27591"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8406999707221985,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8406999707221985,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.041600000113248825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.021299999207258224,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/constrained-optimization","display_name":"Constrained optimization","score":0.5871999859809875},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5835000276565552},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5569000244140625},{"id":"https://openalex.org/keywords/penalty-method","display_name":"Penalty method","score":0.5496000051498413},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.548799991607666},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5116000175476074},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.5059000253677368}],"concepts":[{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6692000031471252},{"id":"https://openalex.org/C55660270","wikidata":"https://www.wikidata.org/wiki/Q5164377","display_name":"Constrained optimization","level":2,"score":0.5871999859809875},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5835000276565552},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5756999850273132},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5569000244140625},{"id":"https://openalex.org/C6180225","wikidata":"https://www.wikidata.org/wiki/Q3411771","display_name":"Penalty method","level":2,"score":0.5496000051498413},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.548799991607666},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5116000175476074},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.5059000253677368},{"id":"https://openalex.org/C2989514635","wikidata":"https://www.wikidata.org/wiki/Q5164377","display_name":"Constrained optimization problem","level":3,"score":0.5023000240325928},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4659000039100647},{"id":"https://openalex.org/C44616089","wikidata":"https://www.wikidata.org/wiki/Q30158686","display_name":"Constraint satisfaction","level":3,"score":0.36070001125335693},{"id":"https://openalex.org/C163863214","wikidata":"https://www.wikidata.org/wiki/Q7624553","display_name":"Strong duality","level":3,"score":0.3452000021934509},{"id":"https://openalex.org/C41045048","wikidata":"https://www.wikidata.org/wiki/Q202843","display_name":"Linear programming","level":2,"score":0.3066999912261963},{"id":"https://openalex.org/C2778023678","wikidata":"https://www.wikidata.org/wiki/Q554403","display_name":"Duality (order theory)","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C177067428","wikidata":"https://www.wikidata.org/wiki/Q17013331","display_name":"Feasible region","level":2,"score":0.29319998621940613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i32.39978","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i32.39978","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39978/43939","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i32.39978","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i32.39978","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39978/43939","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.8041086196899414,"id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3825000620","display_name":null,"funder_award_id":"62301559","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138158283.pdf","grobid_xml":"https://content.openalex.org/works/W7138158283.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Safe":[0],"Reinforcement":[1],"Learning":[2],"(RL)":[3],"often":[4],"faces":[5],"significant":[6,158],"issues":[7],"such":[8],"as":[9,83],"constraint":[10,112],"violations":[11],"and":[12,52,118,125,184],"instability,":[13],"necessitating":[14],"the":[15,40,80,84,87,108,111,122,126,129,135,142,166],"use":[16],"of":[17,128],"constrained":[18,34],"policy":[19,85,109,148,174],"optimization,":[20],"which":[21,103],"seeks":[22],"optimal":[23],"policies":[24],"while":[25],"ensuring":[26],"adherence":[27],"to":[28,99,140],"specific":[29],"constraints":[30],"like":[31],"safety.":[32],"Typically,":[33],"optimization":[35,143,175],"problems":[36],"are":[37],"addressed":[38],"by":[39,55],"Lagrangian":[41],"method,":[42],"a":[43,59,70,90,95,147,170],"post-violation":[44],"remedial":[45],"approach":[46],"that":[47,68,155,165],"may":[48],"result":[49],"in":[50,160],"oscillations":[51],"overshoots.":[53],"Motivated":[54],"this,":[56],"we":[57,93,145],"propose":[58],"novel":[60],"method":[61],"named":[62],"Proactive":[63],"Constrained":[64],"Policy":[65],"Optimization":[66],"(PCPO)":[67],"incorporates":[69],"preemptive":[71],"penalty":[72],"mechanism.":[73],"This":[74],"mechanism":[75],"integrates":[76],"barrier":[77],"items":[78],"into":[79],"objective":[81],"function":[82],"nears":[86],"boundary,":[88],"imposing":[89],"cost.":[91],"Meanwhile,":[92],"introduce":[94],"constraint-aware":[96],"intrinsic":[97],"reward":[98],"guide":[100],"boundary-aware":[101],"exploration,":[102],"is":[104,154],"activated":[105],"only":[106],"when":[107],"approaches":[110],"boundary.":[113],"We":[114],"establish":[115],"theoretical":[116],"upper":[117],"lower":[119],"bounds":[120],"for":[121,173,181],"duality":[123],"gap":[124],"performance":[127],"PCPO":[130,156,167],"update,":[131],"shedding":[132],"light":[133],"on":[134],"method's":[136],"convergence":[137],"characteristics.":[138],"Additionally,":[139],"enhance":[141],"performance,":[144],"adopt":[146],"iteration":[149],"approach.":[150],"An":[151],"interesting":[152],"finding":[153],"demonstrates":[157],"stability":[159],"experiments.":[161],"Experimental":[162],"results":[163],"indicate":[164],"framework":[168],"provides":[169],"robust":[171],"solution":[172],"under":[176],"constraints,":[177],"with":[178],"important":[179],"implications":[180],"future":[182],"research":[183],"practical":[185],"applications.":[186]},"counts_by_year":[],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2026-03-18T00:00:00"}
