{"id":"https://openalex.org/W4415428431","doi":"https://doi.org/10.3233/faia251154","title":"Safe APG: Accelerated Policy Gradient Algorithm for Secure Policy Updating in Reinforcement Learning","display_name":"Safe APG: Accelerated Policy Gradient Algorithm for Secure Policy Updating in Reinforcement Learning","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415428431","doi":"https://doi.org/10.3233/faia251154"},"language":null,"primary_location":{"id":"doi:10.3233/faia251154","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251154","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia251154","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103486185","display_name":"Jianan Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I204831749","display_name":"Southwestern University of Finance and Economics","ror":"https://ror.org/04ewct822","country_code":"CN","type":"education","lineage":["https://openalex.org/I204831749"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianan Lin","raw_affiliation_strings":["Southwestern University of Finance and Economics, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Southwestern University of Finance and Economics, Chengdu, China","institution_ids":["https://openalex.org/I204831749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100394258","display_name":"Yao Chen","orcid":"https://orcid.org/0000-0002-6505-4670"},"institutions":[{"id":"https://openalex.org/I204831749","display_name":"Southwestern University of Finance and Economics","ror":"https://ror.org/04ewct822","country_code":"CN","type":"education","lineage":["https://openalex.org/I204831749"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Chen","raw_affiliation_strings":["Southwestern University of Finance and Economics, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Southwestern University of Finance and Economics, Chengdu, China","institution_ids":["https://openalex.org/I204831749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113368690","display_name":"Zhengyang Ji","orcid":null},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhengyang Ji","raw_affiliation_strings":["Zhejiang University of Technology, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University of Technology, Hangzhou, China","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090568718","display_name":"Meng Yuan","orcid":"https://orcid.org/0000-0003-1381-9202"},"institutions":[{"id":"https://openalex.org/I204831749","display_name":"Southwestern University of Finance and Economics","ror":"https://ror.org/04ewct822","country_code":"CN","type":"education","lineage":["https://openalex.org/I204831749"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Yuan","raw_affiliation_strings":["Southwestern University of Finance and Economics, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Southwestern University of Finance and Economics, Chengdu, China","institution_ids":["https://openalex.org/I204831749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102948451","display_name":"Bo Hou","orcid":"https://orcid.org/0000-0002-3513-8187"},"institutions":[{"id":"https://openalex.org/I2801618472","display_name":"PLA Rocket Force University of Engineering","ror":"https://ror.org/00gg5zj35","country_code":"CN","type":"education","lineage":["https://openalex.org/I2801618472"]},{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Hou","raw_affiliation_strings":["Northwestern Polytechnical University, Xi\u2019an, China","Rocket Force Engineering University, Xi\u2019an, China"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University, Xi\u2019an, China","institution_ids":["https://openalex.org/I17145004"]},{"raw_affiliation_string":"Rocket Force Engineering University, Xi\u2019an, China","institution_ids":["https://openalex.org/I2801618472"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037190906","display_name":"Shaolin Tan","orcid":"https://orcid.org/0000-0001-6549-9760"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaolin Tan","raw_affiliation_strings":["The Zhongguancun Laboratory, Beijing, China"],"affiliations":[{"raw_affiliation_string":"The Zhongguancun Laboratory, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103486185"],"corresponding_institution_ids":["https://openalex.org/I204831749"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.50647824,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9232000112533569,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9232000112533569,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.854200005531311},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5509999990463257},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5370000004768372},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.3991999924182892},{"id":"https://openalex.org/keywords/perturbation","display_name":"Perturbation (astronomy)","score":0.3677000105381012},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.3637000024318695},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.3481000065803528}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.854200005531311},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6917999982833862},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5509999990463257},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5370000004768372},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4641000032424927},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.3991999924182892},{"id":"https://openalex.org/C177918212","wikidata":"https://www.wikidata.org/wiki/Q803623","display_name":"Perturbation (astronomy)","level":2,"score":0.3677000105381012},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.3637000024318695},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.3481000065803528},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.34779998660087585},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.34610000252723694},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33640000224113464},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32350000739097595},{"id":"https://openalex.org/C2986577269","wikidata":"https://www.wikidata.org/wiki/Q11306265","display_name":"Random noise","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.28029999136924744},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.2709999978542328},{"id":"https://openalex.org/C154908896","wikidata":"https://www.wikidata.org/wiki/Q2167404","display_name":"Security policy","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C89109886","wikidata":"https://www.wikidata.org/wiki/Q1535924","display_name":"Trust region","level":3,"score":0.25850000977516174},{"id":"https://openalex.org/C2776889888","wikidata":"https://www.wikidata.org/wiki/Q1135789","display_name":"Unintended consequences","level":2,"score":0.25850000977516174}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia251154","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251154","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia251154","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251154","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Inverse":[0],"reinforcement":[1,134],"learning":[2,65,114,135],"(IRL)":[3],"aims":[4],"to":[5,41,63,86],"infer":[6],"the":[7,68,105,113,120,139,157,160],"reward":[8,30,77,146],"function":[9,31,147],"from":[10,74,112,128],"expert":[11],"demonstrations.":[12],"However,":[13],"as":[14,24,35,116,118],"IRL":[15],"techniques":[16],"are":[17],"increasingly":[18],"applied":[19],"in":[20,133],"high-stakes":[21],"domains":[22],"such":[23],"autonomous":[25],"driving":[26],"and":[27,45,91,131],"military":[28],"decision-making,":[29],"leakage":[32],"has":[33],"emerged":[34],"a":[36,60,88],"critical":[37,109],"risk,":[38],"potentially":[39],"leading":[40],"severe":[42],"security":[43,66],"threats":[44],"unintended":[46],"consequences.":[47],"To":[48],"address":[49],"this":[50],"challenge,":[51],"we":[52],"propose":[53],"Safe":[54,83],"Accelerated":[55,98],"Policy":[56],"Gradient":[57,99],"(Safe":[58],"APG),":[59],"method":[61,141],"designed":[62],"enhance":[64],"of":[67,107,124,159],"demonstrating":[69],"agent":[70,115],"by":[71],"preventing":[72],"observers":[73],"inferring":[75],"its":[76],"function.":[78],"The":[79,126],"core":[80],"idea":[81],"behind":[82],"APG":[84],"is":[85],"incorporate":[87],"delicately":[89],"constructed":[90],"theoretically":[92],"guaranteed":[93],"structural":[94,162],"noise":[95],"into":[96],"Nesterov\u2019s":[97],"(NAG)":[100],"for":[101],"policy":[102],"updating,":[103],"with":[104],"goal":[106],"concealing":[108],"gradient":[110],"information":[111],"well":[117],"keeping":[119],"geometric":[121],"convergence":[122,153],"property":[123],"NAG.":[125],"results":[127],"numerical":[129],"experiments":[130],"simulations":[132],"environments":[136],"demonstrate":[137],"that":[138],"proposed":[140],"not":[142],"only":[143],"significantly":[144],"mitigates":[145],"leakage,":[148],"but":[149],"also":[150],"achieves":[151],"superior":[152],"rates":[154],"even":[155],"under":[156],"perturbation":[158],"introduced":[161],"noise.":[163]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
