{"id":"https://openalex.org/W3172611114","doi":"https://doi.org/10.1145/3452008","title":"PP-PG: Combining Parameter Perturbation with Policy Gradient Methods for Effective and Efficient Explorations in Deep Reinforcement Learning","display_name":"PP-PG: Combining Parameter Perturbation with Policy Gradient Methods for Effective and Efficient Explorations in Deep Reinforcement Learning","publication_year":2021,"publication_date":"2021-06-03","ids":{"openalex":"https://openalex.org/W3172611114","doi":"https://doi.org/10.1145/3452008","mag":"3172611114"},"language":"en","primary_location":{"id":"doi:10.1145/3452008","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3452008","pdf_url":null,"source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066237646","display_name":"Shilei Li","orcid":"https://orcid.org/0000-0003-4870-881X"},"institutions":[{"id":"https://openalex.org/I2800710378","display_name":"Naval University of Engineering","ror":"https://ror.org/056vyez31","country_code":"CN","type":"education","lineage":["https://openalex.org/I2800710378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shilei Li","raw_affiliation_strings":["Department of Information Security, Naval University of Engineering, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0003-4870-881X","affiliations":[{"raw_affiliation_string":"Department of Information Security, Naval University of Engineering, Wuhan, China","institution_ids":["https://openalex.org/I2800710378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100457502","display_name":"Meng Li","orcid":"https://orcid.org/0000-0002-7212-2264"},"institutions":[{"id":"https://openalex.org/I4210115169","display_name":"Second Artillery General Hospital of Chinese People's Liberation Army","ror":"https://ror.org/0264qnp36","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210115169"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Li","raw_affiliation_strings":["Army Academy of Artillery and Air Defense, Hefei, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Army Academy of Artillery and Air Defense, Hefei, China","institution_ids":["https://openalex.org/I4210115169"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108313535","display_name":"Jiongming Su","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiongming Su","raw_affiliation_strings":["College of Intelligence Science and Technology, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Intelligence Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081802331","display_name":"Shaofei Chen","orcid":"https://orcid.org/0000-0001-6823-555X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaofei Chen","raw_affiliation_strings":["College of Intelligence Science and Technology, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Intelligence Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016848462","display_name":"Zhimin Yuan","orcid":"https://orcid.org/0000-0003-4587-8370"},"institutions":[{"id":"https://openalex.org/I2800710378","display_name":"Naval University of Engineering","ror":"https://ror.org/056vyez31","country_code":"CN","type":"education","lineage":["https://openalex.org/I2800710378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhimin Yuan","raw_affiliation_strings":["Department of Information Security, Naval University of Engineering, Wuhan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Information Security, Naval University of Engineering, Wuhan, China","institution_ids":["https://openalex.org/I2800710378"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100347583","display_name":"Qing Ye","orcid":"https://orcid.org/0009-0005-7100-5903"},"institutions":[{"id":"https://openalex.org/I2800710378","display_name":"Naval University of Engineering","ror":"https://ror.org/056vyez31","country_code":"CN","type":"education","lineage":["https://openalex.org/I2800710378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qing Ye","raw_affiliation_strings":["Department of Information Security, Naval University of Engineering, Wuhan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Information Security, Naval University of Engineering, Wuhan, China","institution_ids":["https://openalex.org/I2800710378"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5066237646"],"corresponding_institution_ids":["https://openalex.org/I2800710378"],"apc_list":null,"apc_paid":null,"fwci":0.1399,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.5422257,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"12","issue":"3","first_page":"1","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.942799985408783,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8736349940299988},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7318069934844971},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.5720682740211487},{"id":"https://openalex.org/keywords/perturbation","display_name":"Perturbation (astronomy)","score":0.5097169280052185},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5066271424293518},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.46751028299331665},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4557395577430725},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4267078936100006},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34291765093803406},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18823334574699402},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07203042507171631}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8736349940299988},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7318069934844971},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.5720682740211487},{"id":"https://openalex.org/C177918212","wikidata":"https://www.wikidata.org/wiki/Q803623","display_name":"Perturbation (astronomy)","level":2,"score":0.5097169280052185},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5066271424293518},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.46751028299331665},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4557395577430725},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4267078936100006},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34291765093803406},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18823334574699402},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07203042507171631},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3452008","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3452008","pdf_url":null,"source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G552647032","display_name":null,"funder_award_id":"2014M562555","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G7957059847","display_name":null,"funder_award_id":"61202338","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W119114865","https://openalex.org/W779494576","https://openalex.org/W1608057971","https://openalex.org/W1674110665","https://openalex.org/W1974150877","https://openalex.org/W2004303440","https://openalex.org/W2085627234","https://openalex.org/W2111935653","https://openalex.org/W2123327324","https://openalex.org/W2171658832","https://openalex.org/W2434014514","https://openalex.org/W2565678376","https://openalex.org/W2593766708","https://openalex.org/W2596367596","https://openalex.org/W2596982695","https://openalex.org/W2598247389","https://openalex.org/W2603088459","https://openalex.org/W2614839826","https://openalex.org/W2623491082","https://openalex.org/W2736601468","https://openalex.org/W2744921630","https://openalex.org/W2778749116","https://openalex.org/W2781726626","https://openalex.org/W2785542505","https://openalex.org/W2787938642","https://openalex.org/W2788862220","https://openalex.org/W2794711922","https://openalex.org/W2809844857","https://openalex.org/W2894662639","https://openalex.org/W2906697496","https://openalex.org/W2949608212","https://openalex.org/W2962687375","https://openalex.org/W2963099939","https://openalex.org/W2963979925","https://openalex.org/W3013662176","https://openalex.org/W3037207827"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W2964765435"],"abstract_inverted_index":{"Efficient":[0],"and":[1,17,58,82,113,145,200,203,233,267,274],"stable":[2],"exploration":[3,28,35],"remains":[4],"a":[5,21,55,71,96,110,133,155,175,210,235,253,269],"key":[6],"challenge":[7],"for":[8,158,183],"deep":[9,84],"reinforcement":[10,89],"learning":[11,90],"(DRL)":[12],"operating":[13],"in":[14,29,36,70,75,95,99,109,132,162,209],"high-dimensional":[15],"action":[16,31,101],"state":[18],"spaces.":[19],"Recently,":[20],"more":[22,111,211],"promising":[23],"approach":[24],"by":[25,61,122,136,142,239],"combining":[26,62],"the":[27,30,34,37,45,63,76,83,100,119,123,138,143,146,159,163,168,179,184,196,201,219,229,240,249],"space":[32,39,78,102],"with":[33,79],"parameters":[38,77],"has":[40],"been":[41],"proposed":[42],"to":[43,103,166,177,186,243,246],"get":[44],"best":[46,160],"of":[47,255],"both":[48],"methods.":[49],"In":[50,116,171],"this":[51],"article,":[52],"we":[53,173],"propose":[54,174],"new":[56,236],"iterative":[57],"close-loop":[59],"framework":[60],"evolutionary":[64],"algorithm":[65,223,263],"(EA),":[66],"which":[67,92],"does":[68,93],"explorations":[69,94],"gradient-free":[72],"manner":[73,98,135],"directly":[74],"an":[80],"actor-critic,":[81],"deterministic":[85],"policy":[86,147],"gradient":[87,139,148,190],"(DDPG)":[88,150],"algorithm,":[91],"gradient-based":[97],"make":[104],"these":[105],"two":[106],"methods":[107],"cooperate":[108],"balanced":[112,212],"efficient":[114],"way.":[115],"our":[117,222,262],"framework,":[118],"policies":[120],"represented":[121],"EA":[124,164,197,204,241],"population":[125,165],"(the":[126],"parametric":[127],"perturbation":[128],"part)":[129],"can":[130,192,206,224],"evolve":[131],"guided":[134],"utilizing":[137],"information":[140,191],"provided":[141],"DDPG":[144,185,202,220],"part":[149,205],"is":[151],"used":[152],"only":[153],"as":[154],"fine-tuning":[156,228,234],"tool":[157],"individual":[161],"improve":[167,248],"sample":[169,275],"efficiency.":[170,250,276],"particular,":[172],"criterion":[176],"determine":[178],"training":[180],"steps":[181],"required":[182],"ensure":[187],"that":[188,261],"useful":[189],"be":[193],"generated":[194,198,238],"from":[195],"samples":[199],"work":[207],"together":[208],"way":[213],"during":[214],"each":[215],"generation.":[216],"Furthermore,":[217],"within":[218],"part,":[221],"flexibly":[225],"switch":[226],"between":[227,272],"same":[230],"previous":[231],"RL-Actor":[232],"one":[237],"according":[242],"different":[244],"situations":[245],"further":[247],"Experiments":[251],"on":[252],"range":[254],"challenging":[256],"continuous":[257],"control":[258],"benchmarks":[259],"demonstrate":[260],"outperforms":[264],"related":[265],"works":[266],"offers":[268],"satisfactory":[270],"trade-off":[271],"stability":[273]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
