{"id":"https://openalex.org/W3088331655","doi":"https://doi.org/10.3390/a13090239","title":"Feasibility Analysis and Application of Reinforcement Learning Algorithm Based on Dynamic Parameter Adjustment","display_name":"Feasibility Analysis and Application of Reinforcement Learning Algorithm Based on Dynamic Parameter Adjustment","publication_year":2020,"publication_date":"2020-09-22","ids":{"openalex":"https://openalex.org/W3088331655","doi":"https://doi.org/10.3390/a13090239","mag":"3088331655"},"language":"en","primary_location":{"id":"doi:10.3390/a13090239","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a13090239","pdf_url":"https://www.mdpi.com/1999-4893/13/9/239/pdf?version=1600756688","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/1999-4893/13/9/239/pdf?version=1600756688","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051107809","display_name":"Menglin Li","orcid":"https://orcid.org/0000-0003-3307-5490"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Menglin Li","raw_affiliation_strings":["College of Intelligence Science and Technology, National University of Defense Technology, Changsha 410073, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence Science and Technology, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100842782","display_name":"Xueqiang Gu","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xueqiang Gu","raw_affiliation_strings":["College of Intelligence Science and Technology, National University of Defense Technology, Changsha 410073, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence Science and Technology, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072930058","display_name":"Chengyi Zeng","orcid":"https://orcid.org/0000-0001-7395-2537"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengyi Zeng","raw_affiliation_strings":["College of Intelligence Science and Technology, National University of Defense Technology, Changsha 410073, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence Science and Technology, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022869475","display_name":"Yuan Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Feng","raw_affiliation_strings":["College of Intelligence Science and Technology, National University of Defense Technology, Changsha 410073, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence Science and Technology, National University of Defense Technology, Changsha 410073, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100842782"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.2887,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.63678253,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"13","issue":"9","first_page":"239","last_page":"239"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9789999723434448,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9789999723434448,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9758999943733215,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9254471659660339},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7445012331008911},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.6700031757354736},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6168943643569946},{"id":"https://openalex.org/keywords/learning-classifier-system","display_name":"Learning classifier system","score":0.613894522190094},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.6054482460021973},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6040805578231812},{"id":"https://openalex.org/keywords/instance-based-learning","display_name":"Instance-based learning","score":0.5348531007766724},{"id":"https://openalex.org/keywords/wake-sleep-algorithm","display_name":"Wake-sleep algorithm","score":0.5284687876701355},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.518154501914978},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.49181225895881653},{"id":"https://openalex.org/keywords/robot-learning","display_name":"Robot learning","score":0.4843910336494446},{"id":"https://openalex.org/keywords/online-machine-learning","display_name":"Online machine learning","score":0.4842073917388916},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.4670048952102661},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.37642791867256165},{"id":"https://openalex.org/keywords/generalization-error","display_name":"Generalization error","score":0.13751864433288574},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.11725348234176636}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9254471659660339},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7445012331008911},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.6700031757354736},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6168943643569946},{"id":"https://openalex.org/C199190896","wikidata":"https://www.wikidata.org/wiki/Q3509276","display_name":"Learning classifier system","level":3,"score":0.613894522190094},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.6054482460021973},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6040805578231812},{"id":"https://openalex.org/C24138899","wikidata":"https://www.wikidata.org/wiki/Q17141258","display_name":"Instance-based learning","level":3,"score":0.5348531007766724},{"id":"https://openalex.org/C17061570","wikidata":"https://www.wikidata.org/wiki/Q7960888","display_name":"Wake-sleep algorithm","level":4,"score":0.5284687876701355},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.518154501914978},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.49181225895881653},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.4843910336494446},{"id":"https://openalex.org/C115903097","wikidata":"https://www.wikidata.org/wiki/Q7094097","display_name":"Online machine learning","level":3,"score":0.4842073917388916},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.4670048952102661},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.37642791867256165},{"id":"https://openalex.org/C117765406","wikidata":"https://www.wikidata.org/wiki/Q5362437","display_name":"Generalization error","level":3,"score":0.13751864433288574},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.11725348234176636},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/a13090239","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a13090239","pdf_url":"https://www.mdpi.com/1999-4893/13/9/239/pdf?version=1600756688","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:05020d78803440c0b175a06d8e1ff900","is_oa":true,"landing_page_url":"https://doaj.org/article/05020d78803440c0b175a06d8e1ff900","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms, Vol 13, Iss 9, p 239 (2020)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/1999-4893/13/9/239/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/a13090239","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Algorithms; Volume 13; Issue 9; Pages: 239","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/a13090239","is_oa":true,"landing_page_url":"https://doi.org/10.3390/a13090239","pdf_url":"https://www.mdpi.com/1999-4893/13/9/239/pdf?version=1600756688","source":{"id":"https://openalex.org/S190629608","display_name":"Algorithms","issn_l":"1999-4893","issn":["1999-4893"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Algorithms","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4666313010","display_name":null,"funder_award_id":"61603406","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3088331655.pdf","grobid_xml":"https://content.openalex.org/works/W3088331655.grobid-xml"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W1757796397","https://openalex.org/W1876044947","https://openalex.org/W1966195676","https://openalex.org/W2073945238","https://openalex.org/W2091825929","https://openalex.org/W2097998348","https://openalex.org/W2101786389","https://openalex.org/W2107726111","https://openalex.org/W2108892923","https://openalex.org/W2121863487","https://openalex.org/W2134289401","https://openalex.org/W2145339207","https://openalex.org/W2167425257","https://openalex.org/W2271840356","https://openalex.org/W2586680856","https://openalex.org/W2610686804","https://openalex.org/W2805560727","https://openalex.org/W2913939497","https://openalex.org/W2919115771","https://openalex.org/W2921010761","https://openalex.org/W2949369413","https://openalex.org/W2951884559","https://openalex.org/W2954882791","https://openalex.org/W2970971581","https://openalex.org/W3011120880","https://openalex.org/W3103589371","https://openalex.org/W3127157414","https://openalex.org/W4214717370","https://openalex.org/W4229482599","https://openalex.org/W4231226883","https://openalex.org/W4233840023","https://openalex.org/W4285719527","https://openalex.org/W6639145154","https://openalex.org/W6674385629","https://openalex.org/W6737210826","https://openalex.org/W6775686901","https://openalex.org/W6786437668"],"related_works":["https://openalex.org/W4205569898","https://openalex.org/W4321844043","https://openalex.org/W3196155444","https://openalex.org/W4386979406","https://openalex.org/W3088331655","https://openalex.org/W2610686804","https://openalex.org/W3033412096","https://openalex.org/W2402596571","https://openalex.org/W2189207617","https://openalex.org/W2962720233"],"abstract_inverted_index":{"Reinforcement":[0],"learning,":[1,7,117,229,259],"as":[2],"a":[3,95,260],"branch":[4],"of":[5,21,39,52,65,76,82,103,112,123,130,151,162,189,200,210,225,227,255,269,282,298,306],"machine":[6,41],"has":[8,285],"been":[9,286],"gradually":[10],"applied":[11],"in":[12,17,62,139,155,165,220,247,289,292],"the":[13,18,22,24,36,53,59,63,66,74,77,109,121,136,142,149,160,163,169,174,187,197,201,206,208,216,221,235,243,252,256,266,270,277,280,283,293,296,300],"control":[14,203,218],"field.":[15],"However,":[16],"practical":[19,166,248],"application":[20],"algorithm,":[23,182],"hyperparametric":[25],"approach":[26],"to":[27,147,158,251],"network":[28,274],"settings":[29],"for":[30,98,115,265],"deep":[31,104,179,257,271],"reinforcement":[32,67,83,86,105,116,131,180,228,245,258,272],"learning":[33,42,44,68,87,100,125,132,137,153,181,192,246,267,273,301],"still":[34],"follows":[35],"empirical":[37],"attempts":[38],"traditional":[40,110,244],"(supervised":[43],"and":[45,79,133,178,230,291,309],"unsupervised":[46],"learning).":[47],"This":[48],"method":[49,97,111,119,212,237,264,284,297],"ignores":[50],"part":[51],"information":[54],"generated":[55],"by":[56,172],"agents":[57],"exploring":[58],"environment":[60,224],"contained":[61],"updating":[64],"value":[69],"function,":[70],"which":[71],"will":[72],"affect":[73],"performance":[75],"convergence":[78,198],"cumulative":[80],"return":[81],"learning.":[84,106],"The":[85],"algorithm":[88,164,177,188],"based":[89],"on":[90,108],"dynamic":[91,190],"parameter":[92,303],"adjustment":[93],"is":[94,184,213,232,304],"new":[96,236],"setting":[99,113,263,299],"rate":[101,193,268,302],"parameters":[102,114],"Based":[107],"this":[118,211],"analyzes":[120],"advantages":[122,150],"different":[124,128,152,156],"rates":[126,138,154],"at":[127],"stages":[129,157],"dynamically":[134],"adjusts":[135],"combination":[140],"with":[141],"temporal-difference":[143],"(TD)":[144],"error":[145],"values":[146],"achieve":[148,239],"improve":[159],"rationality":[161],"application.":[167,249,294],"At":[168,276],"same":[170,278],"time,":[171,279],"combining":[173],"Robbins\u2013Monro":[175],"approximation":[176],"it":[183,231],"proved":[185,287],"that":[186,234],"regulation":[191],"can":[194,238],"theoretically":[195],"meet":[196],"requirements":[199],"intelligent":[202],"algorithm.":[204],"In":[205],"experiment,":[207],"effect":[209],"analyzed":[214],"through":[215],"continuous":[217],"scenario":[219],"standard":[222],"experimental":[223],"\u201dCar-on-The-Hill\u201d":[226],"verified":[233],"better":[240],"results":[241],"than":[242],"According":[250],"model":[253],"characteristics":[254],"more":[261],"suitable":[262],"proposed.":[275],"feasibility":[281],"both":[288],"theory":[290],"Therefore,":[295],"worthy":[305],"further":[307],"development":[308],"research.":[310]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
