{"id":"https://openalex.org/W4415883101","doi":"https://doi.org/10.1109/tsmc.2025.3627478","title":"PfoPG: A Personalized Federated First-Order Policy Gradient Algorithm and Its Nonasymptotic Analysis","display_name":"PfoPG: A Personalized Federated First-Order Policy Gradient Algorithm and Its Nonasymptotic Analysis","publication_year":2025,"publication_date":"2025-11-04","ids":{"openalex":"https://openalex.org/W4415883101","doi":"https://doi.org/10.1109/tsmc.2025.3627478"},"language":null,"primary_location":{"id":"doi:10.1109/tsmc.2025.3627478","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2025.3627478","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050705532","display_name":"Junlong Zhu","orcid":"https://orcid.org/0000-0002-6411-7035"},"institutions":[{"id":"https://openalex.org/I167383011","display_name":"Henan University of Science and Technology","ror":"https://ror.org/05d80kz58","country_code":"CN","type":"education","lineage":["https://openalex.org/I167383011"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Junlong Zhu","raw_affiliation_strings":["School of Information Engineering, Henan University of Science and Technology, Luoyang, China"],"raw_orcid":"https://orcid.org/0000-0002-6411-7035","affiliations":[{"raw_affiliation_string":"School of Information Engineering, Henan University of Science and Technology, Luoyang, China","institution_ids":["https://openalex.org/I167383011"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030996045","display_name":"Hao Dong","orcid":"https://orcid.org/0000-0001-6638-4420"},"institutions":[{"id":"https://openalex.org/I167383011","display_name":"Henan University of Science and Technology","ror":"https://ror.org/05d80kz58","country_code":"CN","type":"education","lineage":["https://openalex.org/I167383011"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haotong Dong","raw_affiliation_strings":["School of Information Engineering, Henan University of Science and Technology, Luoyang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Henan University of Science and Technology, Luoyang, China","institution_ids":["https://openalex.org/I167383011"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087014857","display_name":"Mingchuan Zhang","orcid":"https://orcid.org/0000-0002-2523-1089"},"institutions":[{"id":"https://openalex.org/I167383011","display_name":"Henan University of Science and Technology","ror":"https://ror.org/05d80kz58","country_code":"CN","type":"education","lineage":["https://openalex.org/I167383011"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingchuan Zhang","raw_affiliation_strings":["School of Information Engineering, Henan University of Science and Technology, Luoyang, China"],"raw_orcid":"https://orcid.org/0000-0002-2523-1089","affiliations":[{"raw_affiliation_string":"School of Information Engineering, Henan University of Science and Technology, Luoyang, China","institution_ids":["https://openalex.org/I167383011"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110850632","display_name":"Gaofeng Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I167383011","display_name":"Henan University of Science and Technology","ror":"https://ror.org/05d80kz58","country_code":"CN","type":"education","lineage":["https://openalex.org/I167383011"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gaofeng Chen","raw_affiliation_strings":["School of Information Engineering, Henan University of Science and Technology, Luoyang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Henan University of Science and Technology, Luoyang, China","institution_ids":["https://openalex.org/I167383011"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069590254","display_name":"Ruijuan Zheng","orcid":"https://orcid.org/0000-0002-0932-8788"},"institutions":[{"id":"https://openalex.org/I167383011","display_name":"Henan University of Science and Technology","ror":"https://ror.org/05d80kz58","country_code":"CN","type":"education","lineage":["https://openalex.org/I167383011"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruijuan Zheng","raw_affiliation_strings":["School of Information Engineering, Henan University of Science and Technology, Luoyang, China"],"raw_orcid":"https://orcid.org/0000-0002-0932-8788","affiliations":[{"raw_affiliation_string":"School of Information Engineering, Henan University of Science and Technology, Luoyang, China","institution_ids":["https://openalex.org/I167383011"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053095481","display_name":"Quanbo Ge","orcid":"https://orcid.org/0000-0002-0044-6059"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quanbo Ge","raw_affiliation_strings":["School of Automation, Nanjing University of Information Science and Technology, Nanjing, Jiangsu, China"],"raw_orcid":"https://orcid.org/0000-0002-0044-6059","affiliations":[{"raw_affiliation_string":"School of Automation, Nanjing University of Information Science and Technology, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080439384","display_name":"Qingtao Wu","orcid":"https://orcid.org/0000-0003-1572-5293"},"institutions":[{"id":"https://openalex.org/I167383011","display_name":"Henan University of Science and Technology","ror":"https://ror.org/05d80kz58","country_code":"CN","type":"education","lineage":["https://openalex.org/I167383011"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingtao Wu","raw_affiliation_strings":["School of Information Engineering, Henan University of Science and Technology, Luoyang, China"],"raw_orcid":"https://orcid.org/0000-0003-1572-5293","affiliations":[{"raw_affiliation_string":"School of Information Engineering, Henan University of Science and Technology, Luoyang, China","institution_ids":["https://openalex.org/I167383011"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5050705532"],"corresponding_institution_ids":["https://openalex.org/I167383011"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16380365,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"56","issue":"1","first_page":"192","last_page":"203"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8321999907493591,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8321999907493591,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.04839999973773956,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.022199999541044235,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.7404000163078308},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5428000092506409},{"id":"https://openalex.org/keywords/rate-of-convergence","display_name":"Rate of convergence","score":0.524399995803833},{"id":"https://openalex.org/keywords/federated-learning","display_name":"Federated learning","score":0.5181999802589417},{"id":"https://openalex.org/keywords/order","display_name":"Order (exchange)","score":0.4269999861717224},{"id":"https://openalex.org/keywords/mixing","display_name":"Mixing (physics)","score":0.39469999074935913}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7534999847412109},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.7404000163078308},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5428000092506409},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.524399995803833},{"id":"https://openalex.org/C2992525071","wikidata":"https://www.wikidata.org/wiki/Q50818671","display_name":"Federated learning","level":2,"score":0.5181999802589417},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.44589999318122864},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.4269999861717224},{"id":"https://openalex.org/C138777275","wikidata":"https://www.wikidata.org/wiki/Q6884054","display_name":"Mixing (physics)","level":2,"score":0.39469999074935913},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.36399999260902405},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3605000078678131},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35580000281333923},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.32190001010894775},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.31130000948905945},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.304500013589859},{"id":"https://openalex.org/C89109886","wikidata":"https://www.wikidata.org/wiki/Q1535924","display_name":"Trust region","level":3,"score":0.2838999927043915},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C189237950","wikidata":"https://www.wikidata.org/wiki/Q2500758","display_name":"Stationary point","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsmc.2025.3627478","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsmc.2025.3627478","pdf_url":null,"source":{"id":"https://openalex.org/S4210209078","display_name":"IEEE Transactions on Systems Man and Cybernetics Systems","issn_l":"2168-2216","issn":["2168-2216","2168-2232"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Systems, Man, and Cybernetics: Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1413938862","display_name":null,"funder_award_id":"61976243","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4523051586","display_name":null,"funder_award_id":"23ZX003","funder_id":"https://openalex.org/F4320335955","funder_display_name":"Key Scientific Research Project of Colleges and Universities in Henan Province"},{"id":"https://openalex.org/G5430684331","display_name":null,"funder_award_id":"62172142","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335955","display_name":"Key Scientific Research Project of Colleges and Universities in Henan Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2046859786","https://openalex.org/W2119717200","https://openalex.org/W2144446635","https://openalex.org/W2911450448","https://openalex.org/W2968937098","https://openalex.org/W2972087877","https://openalex.org/W2998717292","https://openalex.org/W3080934299","https://openalex.org/W3104631511","https://openalex.org/W3127561923","https://openalex.org/W3133814152","https://openalex.org/W3173294282","https://openalex.org/W3202016380","https://openalex.org/W3205598877","https://openalex.org/W4200635498","https://openalex.org/W4282924988","https://openalex.org/W4313136702","https://openalex.org/W4361854800","https://openalex.org/W4378804862","https://openalex.org/W4386473383","https://openalex.org/W4390659346","https://openalex.org/W4401508090","https://openalex.org/W4401508520","https://openalex.org/W4403407853"],"related_works":[],"abstract_inverted_index":{"This":[0],"article":[1,55,101],"revisits":[2],"the":[3,13,41,85,104,130,136,145,165,174],"federated":[4,25,35,61,138],"policy":[5,15,36,46,63,74,117,139],"gradient":[6,37,64,140],"algorithm":[7],"with":[8,118],"environment":[9],"heterogeneity":[10],"for":[11],"finding":[12],"optimal":[14,79],"in":[16],"multiagent":[17,175],"reinforcement":[18],"learning":[19],"(RL).":[20],"Toward":[21],"this":[22,52,54,100],"direction,":[23],"personalized":[24,34,42,60,73],"RL":[26],"methods":[27,38],"have":[28],"been":[29],"presented":[30],"recently.":[31],"However,":[32],"existing":[33],"may":[39],"confine":[40],"capacity":[43],"of":[44,108,132,167],"local":[45,82],"models.":[47],"In":[48],"order":[49],"to":[50,67,94,114,135,156],"tackle":[51],"challenge,":[53],"develops":[56],"a":[57,72,115],"provably":[58],"convergent":[59],"first-order":[62],"algorithm,":[65],"referred":[66],"as":[68],"PfoPG,":[69],"which":[70],"learns":[71],"model":[75],"by":[76,169],"adaptively":[77],"mixing":[78],"global":[80],"and":[81],"policies.":[83],"Moreover,":[84],"momentum-based":[86],"importance":[87],"sampling":[88],"is":[89],"also":[90],"introduced":[91],"into":[92],"PfoPG":[93,112,142,168],"improve":[95,144],"its":[96],"convergence":[97,106,146],"speed.":[98],"Meanwhile,":[99],"rigorously":[102],"analyzes":[103],"nonasymptotic":[105],"behavior":[107],"PfoPG.":[109],"More":[110],"specifically,":[111],"converges":[113],"stationary":[116],"rate":[119,147],"<italic":[120,126,149,157],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[121,123,127,150,152,154,158,160],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">O</i>(1/<italic":[122,151,159],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">K</i>),":[124],"where":[125],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">K</i>":[128],"denotes":[129],"number":[131],"iterations.":[133],"Compared":[134],"state-of-the-art":[137],"methods,":[141],"can":[143],"from":[148],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">K</i><sup":[153],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2/3</sup>)":[155],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">K</i>).":[161],"Finally,":[162],"we":[163],"verify":[164],"effectiveness":[166],"various":[170],"experiments":[171],"based":[172],"on":[173],"particle":[176],"environment.":[177]},"counts_by_year":[],"updated_date":"2025-12-19T00:32:22.182498","created_date":"2025-11-04T00:00:00"}
