{"id":"https://openalex.org/W3176433904","doi":"https://doi.org/10.1142/s1469026821500115","title":"A Novel Adaptive Sampling Strategy for Deep Reinforcement Learning","display_name":"A Novel Adaptive Sampling Strategy for Deep Reinforcement Learning","publication_year":2021,"publication_date":"2021-06-01","ids":{"openalex":"https://openalex.org/W3176433904","doi":"https://doi.org/10.1142/s1469026821500115","mag":"3176433904"},"language":"en","primary_location":{"id":"doi:10.1142/s1469026821500115","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s1469026821500115","pdf_url":null,"source":{"id":"https://openalex.org/S206936884","display_name":"International Journal of Computational Intelligence and Applications","issn_l":"1469-0268","issn":["1469-0268","1757-5885"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311754","host_organization_name":"Imperial College Press","host_organization_lineage":["https://openalex.org/P4310311754"],"host_organization_lineage_names":["Imperial College Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computational Intelligence and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080249769","display_name":"Xingxing Liang","orcid":"https://orcid.org/0000-0002-3594-2167"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingxing Liang","raw_affiliation_strings":["National University of Defense Technology, College of Systems Engineering, Changsha, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, College of Systems Engineering, Changsha, P. R. China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100379250","display_name":"Li Chen","orcid":"https://orcid.org/0000-0002-4761-5913"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Chen","raw_affiliation_strings":["National University of Defense Technology, College of Systems Engineering, Changsha, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, College of Systems Engineering, Changsha, P. R. China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100731174","display_name":"Yanghe Feng","orcid":"https://orcid.org/0000-0002-1039-9735"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yanghe Feng","raw_affiliation_strings":["National University of Defense Technology, College of Systems Engineering, Changsha, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, College of Systems Engineering, Changsha, P. R. China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115588589","display_name":"Zhong Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhong Liu","raw_affiliation_strings":["National University of Defense Technology, College of Systems Engineering, Changsha, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, College of Systems Engineering, Changsha, P. R. China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069710974","display_name":"Yang Ma","orcid":"https://orcid.org/0000-0001-7067-5423"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Ma","raw_affiliation_strings":["National University of Defense Technology, College of Systems Engineering, Changsha, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, College of Systems Engineering, Changsha, P. R. China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070928752","display_name":"Kuihua Huang","orcid":"https://orcid.org/0000-0001-6365-275X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kuihua Huang","raw_affiliation_strings":["National University of Defense Technology, College of Systems Engineering, Changsha, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, College of Systems Engineering, Changsha, P. R. China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100731174"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.4197,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.68929378,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"20","issue":"02","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9700999855995178,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9682999849319458,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9172194004058838},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.895776093006134},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5728994011878967},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.565886378288269},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4659838080406189},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.44685959815979004},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.44178706407546997},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.43612948060035706}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9172194004058838},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.895776093006134},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5728994011878967},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.565886378288269},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4659838080406189},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.44685959815979004},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.44178706407546997},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.43612948060035706},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s1469026821500115","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s1469026821500115","pdf_url":null,"source":{"id":"https://openalex.org/S206936884","display_name":"International Journal of Computational Intelligence and Applications","issn_l":"1469-0268","issn":["1469-0268","1757-5885"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311754","host_organization_name":"Imperial College Press","host_organization_lineage":["https://openalex.org/P4310311754"],"host_organization_lineage_names":["Imperial College Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computational Intelligence and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.699999988079071,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W1555689267","https://openalex.org/W1989858079","https://openalex.org/W2092710777","https://openalex.org/W2096145798","https://openalex.org/W2108892923","https://openalex.org/W2145339207","https://openalex.org/W2257979135","https://openalex.org/W2761873684","https://openalex.org/W2766447205","https://openalex.org/W2788862220","https://openalex.org/W4245108548"],"related_works":["https://openalex.org/W2000444236","https://openalex.org/W2384475851","https://openalex.org/W4362501864","https://openalex.org/W4306904969","https://openalex.org/W4380318855","https://openalex.org/W2138720691","https://openalex.org/W2031695474","https://openalex.org/W2353602216","https://openalex.org/W2042102171","https://openalex.org/W4251223544"],"abstract_inverted_index":{"Reinforcement":[0],"learning,":[1],"as":[2,19],"an":[3,13,98],"effective":[4],"method":[5],"to":[6,36,48,70,76,85,114,133,177],"solve":[7],"complex":[8],"sequential":[9],"decision-making":[10,21],"problems,":[11],"plays":[12],"important":[14],"role":[15],"in":[16,65,148,190],"areas":[17],"such":[18],"intelligent":[20],"and":[22,80,87,119,129,153,161,184,193],"behavioral":[23],"cognition.":[24],"It":[25],"is":[26],"well":[27],"known":[28],"that":[29],"the":[30,37,50,55,62,66,71,124,141,145,149,162,167,181,188,197],"sample":[31,63,67,107],"experience":[32,58],"replay":[33,59],"mechanism":[34,60],"contributes":[35],"development":[38],"of":[39,52,116,151,156,169,196],"current":[40],"deep":[41],"reinforcement":[42,90],"learning":[43,91],"by":[44,109,180],"reusing":[45],"past":[46],"samples":[47,115],"improve":[49,134],"efficiency":[51],"samples.":[53],"However,":[54],"existing":[56],"priority":[57],"changes":[61],"distribution":[64],"set":[68],"due":[69],"higher":[72],"sampling":[73],"frequency":[74],"assigned":[75],"a":[77],"specific":[78],"transition,":[79],"it":[81,121],"cannot":[82],"be":[83],"applied":[84],"actor-critic":[86],"other":[88],"on-policy":[89],"algorithm.":[92],"To":[93],"address":[94],"this,":[95],"we":[96,138],"propose":[97],"adaptive":[99],"factor":[100],"based":[101],"on":[102,166],"TD-error,":[103,118],"which":[104],"further":[105],"increases":[106],"utilization":[108],"giving":[110],"more":[111],"attention":[112],"weight":[113],"larger":[117],"embeds":[120],"flexibly":[122],"into":[123],"original":[125,185],"Deep":[126],"Q":[127],"Network":[128],"Advantage":[130],"Actor-Critic":[131],"algorithm":[132],"their":[135],"performance.":[136],"Then":[137],"carried":[139],"out":[140],"performance":[142],"evaluation":[143],"for":[144],"proposed":[146],"architecture":[147],"context":[150],"CartPole-V1":[152],"6":[154],"environments":[155],"Atari":[157],"game":[158],"experiments,":[159],"respectively,":[160],"obtained":[163],"results":[164],"either":[165],"conditions":[168],"fixed":[170],"temperature":[171],"or":[172],"annealing":[173],"temperature,":[174],"when":[175],"compared":[176],"those":[178],"produced":[179],"vanilla":[182],"DQN":[183],"A2C,":[186],"highlight":[187],"advantages":[189],"cumulative":[191],"rewards":[192],"climb":[194],"speed":[195],"improved":[198],"algorithms.":[199]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
