{"id":"https://openalex.org/W3042840230","doi":"https://doi.org/10.1109/access.2020.3009329","title":"Sample Efficient Reinforcement Learning Method via High Efficient Episodic Memory","display_name":"Sample Efficient Reinforcement Learning Method via High Efficient Episodic Memory","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3042840230","doi":"https://doi.org/10.1109/access.2020.3009329","mag":"3042840230"},"language":"en","primary_location":{"id":"doi:10.1109/access.2020.3009329","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2020.3009329","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2020.3009329","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015814760","display_name":"Dujia Yang","orcid":"https://orcid.org/0000-0002-9309-9067"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dujia Yang","raw_affiliation_strings":["CAS Key Laboratory of Wireless-Optical Communications, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"CAS Key Laboratory of Wireless-Optical Communications, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089544297","display_name":"Xiaowei Qin","orcid":"https://orcid.org/0000-0001-7296-3461"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaowei Qin","raw_affiliation_strings":["CAS Key Laboratory of Wireless-Optical Communications, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"CAS Key Laboratory of Wireless-Optical Communications, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101815988","display_name":"Xiaodong Xu","orcid":"https://orcid.org/0000-0001-9041-3826"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaodong Xu","raw_affiliation_strings":["CAS Key Laboratory of Wireless-Optical Communications, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"CAS Key Laboratory of Wireless-Optical Communications, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037554891","display_name":"Chensheng Li","orcid":"https://orcid.org/0000-0002-3500-754X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chensheng Li","raw_affiliation_strings":["CAS Key Laboratory of Wireless-Optical Communications, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"CAS Key Laboratory of Wireless-Optical Communications, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101398113","display_name":"Guo Wei","orcid":"https://orcid.org/0000-0001-9888-2408"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guo Wei","raw_affiliation_strings":["CAS Key Laboratory of Wireless-Optical Communications, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"CAS Key Laboratory of Wireless-Optical Communications, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5015814760"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":1.6462,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.87307412,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"8","issue":null,"first_page":"129274","last_page":"129284"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9702000021934509,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9326000213623047,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8168603181838989},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8136024475097656},{"id":"https://openalex.org/keywords/episodic-memory","display_name":"Episodic memory","score":0.8046911358833313},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5498219132423401},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.46152037382125854},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3606812357902527},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.12960383296012878},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.07471734285354614}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8168603181838989},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8136024475097656},{"id":"https://openalex.org/C88576662","wikidata":"https://www.wikidata.org/wiki/Q18646","display_name":"Episodic memory","level":3,"score":0.8046911358833313},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5498219132423401},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.46152037382125854},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3606812357902527},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.12960383296012878},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.07471734285354614},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2020.3009329","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2020.3009329","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:fb6d50b1f2ee439d82936f96de7f64d0","is_oa":true,"landing_page_url":"https://doaj.org/article/fb6d50b1f2ee439d82936f96de7f64d0","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 8, Pp 129274-129284 (2020)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2020.3009329","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2020.3009329","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.7099999785423279,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G2493454646","display_name":null,"funder_award_id":"2018YFA0701603","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G7385621627","display_name":null,"funder_award_id":"2008085MF213","funder_id":"https://openalex.org/F4320334897","funder_display_name":"Natural Science Foundation of Anhui Province"}],"funders":[{"id":"https://openalex.org/F4320334897","display_name":"Natural Science Foundation of Anhui Province","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":72,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W41554520","https://openalex.org/W644076287","https://openalex.org/W1977655452","https://openalex.org/W2047057213","https://openalex.org/W2057055688","https://openalex.org/W2096698697","https://openalex.org/W2100983013","https://openalex.org/W2107741520","https://openalex.org/W2111873046","https://openalex.org/W2112707476","https://openalex.org/W2119051448","https://openalex.org/W2121863487","https://openalex.org/W2129670787","https://openalex.org/W2145339207","https://openalex.org/W2150468603","https://openalex.org/W2155968351","https://openalex.org/W2168814611","https://openalex.org/W2173564293","https://openalex.org/W2201581102","https://openalex.org/W2257979135","https://openalex.org/W2436711315","https://openalex.org/W2553109721","https://openalex.org/W2583993537","https://openalex.org/W2594466397","https://openalex.org/W2733312032","https://openalex.org/W2746553466","https://openalex.org/W2761873684","https://openalex.org/W2766447205","https://openalex.org/W2785998395","https://openalex.org/W2787841449","https://openalex.org/W2792919371","https://openalex.org/W2890148520","https://openalex.org/W2895453875","https://openalex.org/W2907051988","https://openalex.org/W2927014686","https://openalex.org/W2938321354","https://openalex.org/W2946469828","https://openalex.org/W2949475445","https://openalex.org/W2951799221","https://openalex.org/W2962831590","https://openalex.org/W2963477884","https://openalex.org/W2963674921","https://openalex.org/W2964082094","https://openalex.org/W2964291307","https://openalex.org/W2971204130","https://openalex.org/W2979473749","https://openalex.org/W2998069546","https://openalex.org/W3100944043","https://openalex.org/W3103780890","https://openalex.org/W3106462682","https://openalex.org/W4230890479","https://openalex.org/W4289376774","https://openalex.org/W4297732320","https://openalex.org/W6676320248","https://openalex.org/W6677145610","https://openalex.org/W6685444567","https://openalex.org/W6687681856","https://openalex.org/W6715102896","https://openalex.org/W6718359804","https://openalex.org/W6730013898","https://openalex.org/W6730641667","https://openalex.org/W6734330393","https://openalex.org/W6744838376","https://openalex.org/W6747682777","https://openalex.org/W6748714527","https://openalex.org/W6754857033","https://openalex.org/W6754957883","https://openalex.org/W6761032199","https://openalex.org/W6769341872","https://openalex.org/W6772496516","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347","https://openalex.org/W4210805261"],"abstract_inverted_index":{"Reinforcement":[0,5],"Learning":[1,6],"(RL),":[2],"especially":[3],"Deep":[4,126],"(DRL),":[7],"has":[8],"made":[9],"great":[10],"progress":[11],"in":[12,41,82,176],"many":[13],"areas,":[14],"such":[15],"as":[16],"robots,":[17],"video":[18],"games":[19],"and":[20,109,122,161,209,213],"driving.":[21],"However,":[22,66],"sample":[23,174,233],"inefficiency":[24],"is":[25,138,154,189,205],"a":[26,83,96,104,148,193,224],"big":[27],"obstacle":[28],"to":[29,90,156,191,231],"the":[30,38,159,163,169,173,199,214],"widespread":[31],"practical":[32],"application":[33],"of":[34,86,117,171],"DRL.":[35,87],"Inspired":[36],"by":[37,49,235],"decision":[39],"making":[40],"human":[42],"brain,":[43],"this":[44,93],"problem":[45],"can":[46],"be":[47],"solved":[48],"incorporating":[50],"instance":[51],"based":[52,60],"learning,":[53],"i.e.":[54],"episodic":[55,58,76,80,106,111,151,216,237],"memory.":[56],"Many":[57],"memory":[59,81,107,152,188,217,238],"RL":[61,229],"algorithms":[62,68,230],"have":[63],"emerged":[64],"recently.":[65],"these":[67],"either":[69],"only":[70],"replace":[71],"parametric":[72],"DRL":[73],"algorithm":[74,129,137,204],"with":[75,134],"control":[77],"or":[78],"incorporate":[79],"single":[84],"component":[85],"In":[88,146],"contrast":[89],"preview":[91],"works,":[92],"paper":[94],"proposes":[95],"new":[97,105,149,225],"sample-efficient":[98,210],"reinforcement":[99],"learning":[100,175],"architecture":[101],"which":[102],"introduces":[103],"module":[108,153],"incorporates":[110],"thought":[112],"into":[113],"some":[114],"key":[115],"components":[116],"DRL:":[118],"exploration,":[119],"experience":[120,177],"replay":[121,187],"loss":[123,160],"function.":[124],"Taking":[125],"Q-Network":[127],"(DQN)":[128],"for":[130,166,227],"example,":[131],"when":[132],"combined":[133],"DQN,":[135],"our":[136,203],"called":[139,183],"High":[140],"Efficient":[141],"Episodic":[142],"Memory":[143],"DQN":[144,212],"(HE-EMDQN).":[145],"HE-EMDQN,":[147],"non-parametric":[150],"introduced":[155],"help":[157],"calculate":[158],"modify":[162],"predicted":[164],"value":[165],"exploration.":[167],"For":[168],"sake":[170],"accelerating":[172],"replay,":[178],"an":[179],"auxiliary":[180],"small":[181],"buffer":[182],"percentile":[184],"best":[185],"episode":[186],"designed":[190],"compose":[192],"mixed":[194],"mini-batch.":[195],"We":[196],"show":[197],"across":[198],"testing":[200],"environments":[201],"that":[202],"significantly":[206],"more":[207],"powerful":[208],"than":[211],"recent":[215],"deep":[218],"q-network":[219],"(EMDQN).":[220],"This":[221],"work":[222],"provides":[223],"perspective":[226],"other":[228],"improve":[232],"efficiency":[234],"utilising":[236],"efficiently.":[239]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":18},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
