{"id":"https://openalex.org/W3115077346","doi":"https://doi.org/10.1109/access.2020.3045835","title":"Policy Return: A New Method for Reducing the Number of Experimental Trials in Deep Reinforcement Learning","display_name":"Policy Return: A New Method for Reducing the Number of Experimental Trials in Deep Reinforcement Learning","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3115077346","doi":"https://doi.org/10.1109/access.2020.3045835","mag":"3115077346"},"language":"en","primary_location":{"id":"doi:10.1109/access.2020.3045835","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2020.3045835","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09298771.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09298771.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100415312","display_name":"Feng Liu","orcid":"https://orcid.org/0000-0002-9006-4520"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Feng Liu","raw_affiliation_strings":["State Key Laboratory of VR Technology & Systems, Beihang University (BUAA), Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9006-4520","affiliations":[{"raw_affiliation_string":"State Key Laboratory of VR Technology & Systems, Beihang University (BUAA), Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102866500","display_name":"Shuling Dai","orcid":"https://orcid.org/0000-0002-2934-9033"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuling Dai","raw_affiliation_strings":["State Key Laboratory of VR Technology & Systems, Beihang University (BUAA), Beijing, China","Jiangxi Research Institute, Beihang University (BUAA), Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-2934-9033","affiliations":[{"raw_affiliation_string":"State Key Laboratory of VR Technology & Systems, Beihang University (BUAA), Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Jiangxi Research Institute, Beihang University (BUAA), Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082426267","display_name":"Yongjia Zhao","orcid":"https://orcid.org/0000-0002-4557-9066"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongjia Zhao","raw_affiliation_strings":["State Key Laboratory of VR Technology & Systems, Beihang University (BUAA), Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-4557-9066","affiliations":[{"raw_affiliation_string":"State Key Laboratory of VR Technology & Systems, Beihang University (BUAA), Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100415312"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.136,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.58709355,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"8","issue":null,"first_page":"228099","last_page":"228107"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.8586024045944214},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8186781406402588},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6882305145263672},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5724294185638428},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.5391542911529541},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5184952020645142},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4787415862083435},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4391811788082123},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.41667819023132324},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.32939931750297546},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18197974562644958}],"concepts":[{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.8586024045944214},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8186781406402588},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6882305145263672},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5724294185638428},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.5391542911529541},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5184952020645142},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4787415862083435},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4391811788082123},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.41667819023132324},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.32939931750297546},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18197974562644958},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2020.3045835","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2020.3045835","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09298771.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:ce74b71c5e014fbbac96e1e91fa156c3","is_oa":true,"landing_page_url":"https://doaj.org/article/ce74b71c5e014fbbac96e1e91fa156c3","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 8, Pp 228099-228107 (2020)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2020.3045835","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2020.3045835","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/09298771.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.49000000953674316,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3115077346.pdf","grobid_xml":"https://content.openalex.org/works/W3115077346.grobid-xml"},"referenced_works_count":53,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1515851193","https://openalex.org/W1522301498","https://openalex.org/W1757796397","https://openalex.org/W1771410628","https://openalex.org/W2121863487","https://openalex.org/W2142441581","https://openalex.org/W2145339207","https://openalex.org/W2155007355","https://openalex.org/W2156737235","https://openalex.org/W2173248099","https://openalex.org/W2201581102","https://openalex.org/W2257979135","https://openalex.org/W2546571074","https://openalex.org/W2575705757","https://openalex.org/W2583993537","https://openalex.org/W2724169821","https://openalex.org/W2726187156","https://openalex.org/W2736601468","https://openalex.org/W2754517384","https://openalex.org/W2761873684","https://openalex.org/W2766447205","https://openalex.org/W2809256243","https://openalex.org/W2902098903","https://openalex.org/W2960855687","https://openalex.org/W2963423916","https://openalex.org/W2963428623","https://openalex.org/W2963864421","https://openalex.org/W2964043796","https://openalex.org/W2964121744","https://openalex.org/W2964161785","https://openalex.org/W2964174623","https://openalex.org/W3006186780","https://openalex.org/W3100789280","https://openalex.org/W3100944043","https://openalex.org/W4250743307","https://openalex.org/W4298857966","https://openalex.org/W4298876402","https://openalex.org/W6631190155","https://openalex.org/W6637967152","https://openalex.org/W6638018090","https://openalex.org/W6682849425","https://openalex.org/W6683195989","https://openalex.org/W6683300800","https://openalex.org/W6684921986","https://openalex.org/W6687681856","https://openalex.org/W6692846177","https://openalex.org/W6740092555","https://openalex.org/W6740222838","https://openalex.org/W6741002519","https://openalex.org/W6744123322","https://openalex.org/W6744838376","https://openalex.org/W6752603789"],"related_works":["https://openalex.org/W2140186469","https://openalex.org/W4390421286","https://openalex.org/W4280563792","https://openalex.org/W4389724018","https://openalex.org/W4318719684","https://openalex.org/W4318559728","https://openalex.org/W3183136280","https://openalex.org/W2775233965","https://openalex.org/W3114716045","https://openalex.org/W4281847915"],"abstract_inverted_index":{"Using":[0],"the":[1,29,32,56,67,79,110,113,132,145,154,166],"same":[2],"algorithm":[3,43],"and":[4,20,126,158],"hyperparameter":[5],"configurations,":[6],"deep":[7],"reinforcement":[8],"learning":[9],"(DRL)":[10],"will":[11],"derive":[12],"drastically":[13],"different":[14],"results":[15,24],"from":[16],"multiple":[17],"experimental":[18],"trials,":[19],"most":[21],"of":[22,28,31,47,69,97,104,112,148,153],"these":[23],"are":[25],"unsatisfactory.":[26],"Because":[27],"instability":[30],"results,":[33],"researchers":[34],"have":[35],"to":[36,40,82,84,109,116,141,161],"perform":[37],"many":[38],"trials":[39,70,149],"confirm":[41],"an":[42],"or":[44,92],"a":[45,62,73,85,101,118,139,159],"set":[46],"hyperparameters":[48],"in":[49,144],"DRL.":[50],"In":[51],"this":[52],"article,":[53],"we":[54],"present":[55],"policy":[57,81,133],"return":[58,83,134],"method,":[59],"which":[60],"is":[61,107],"new":[63],"design":[64],"for":[65],"reducing":[66],"number":[68,147],"when":[71,88],"training":[72],"DRL":[74],"model.":[75],"This":[76],"method":[77,135],"allows":[78],"learned":[80],"previous":[86],"state":[87],"it":[89],"becomes":[90],"divergent":[91],"stagnant":[93],"at":[94],"any":[95],"stage":[96],"training.":[98],"When":[99],"returning,":[100],"certain":[102],"percentage":[103],"stochastic":[105],"data":[106],"added":[108],"weights":[111],"neural":[114],"networks":[115],"prevent":[117],"repeated":[119],"decline.":[120],"Extensive":[121],"experiments":[122],"on":[123],"challenging":[124],"tasks":[125],"various":[127],"target":[128],"scores":[129],"demonstrate":[130],"that":[131,152],"can":[136],"bring":[137],"about":[138],"10%":[140,160],"40%":[142],"reduction":[143,163],"required":[146],"compared":[150,164],"with":[151,165],"corresponding":[155],"original":[156],"algorithm,":[157],"30%":[162],"state-of-the-art":[167],"algorithms.":[168]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
