{"id":"https://openalex.org/W3171174687","doi":"https://doi.org/10.1109/access.2021.3085142","title":"Enhanced Off-Policy Reinforcement Learning With Focused Experience Replay","display_name":"Enhanced Off-Policy Reinforcement Learning With Focused Experience Replay","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3171174687","doi":"https://doi.org/10.1109/access.2021.3085142","mag":"3171174687"},"language":"en","primary_location":{"id":"doi:10.1109/access.2021.3085142","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3085142","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09444458.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09444458.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073091471","display_name":"Seung-Hyun Kong","orcid":"https://orcid.org/0000-0002-4753-1998"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Seung-Hyun Kong","raw_affiliation_strings":["The Cho Chun Shik Graduate School of Green Transportation, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea","The Robotics Program, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"],"affiliations":[{"raw_affiliation_string":"The Cho Chun Shik Graduate School of Green Transportation, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea","institution_ids":["https://openalex.org/I157485424"]},{"raw_affiliation_string":"The Robotics Program, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031946207","display_name":"I Made Aswin Nahrendra","orcid":"https://orcid.org/0000-0001-9515-7059"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"I. Made Aswin Nahrendra","raw_affiliation_strings":["The Robotics Program, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"],"affiliations":[{"raw_affiliation_string":"The Robotics Program, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea","institution_ids":["https://openalex.org/I157485424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007043388","display_name":"Dong-Hee Paek","orcid":"https://orcid.org/0000-0003-0008-3726"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Dong-Hee Paek","raw_affiliation_strings":["The Cho Chun Shik Graduate School of Green Transportation, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"],"affiliations":[{"raw_affiliation_string":"The Cho Chun Shik Graduate School of Green Transportation, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea","institution_ids":["https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5073091471"],"corresponding_institution_ids":["https://openalex.org/I157485424"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":1.1188,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.8202343,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"9","issue":null,"first_page":"93152","last_page":"93164"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9824000000953674,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9707000255584717,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8490054607391357},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7693458795547485},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.712764322757721},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.7010958790779114},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.5770161747932434},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.5564461946487427},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5098320841789246},{"id":"https://openalex.org/keywords/experience-sampling-method","display_name":"Experience sampling method","score":0.47980526089668274},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.45422297716140747},{"id":"https://openalex.org/keywords/importance-sampling","display_name":"Importance sampling","score":0.42481905221939087},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.421505331993103},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41531217098236084},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.35455965995788574},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.21618875861167908},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16073939204216003},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08821207284927368}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8490054607391357},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7693458795547485},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.712764322757721},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.7010958790779114},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.5770161747932434},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.5564461946487427},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5098320841789246},{"id":"https://openalex.org/C65499552","wikidata":"https://www.wikidata.org/wiki/Q5421061","display_name":"Experience sampling method","level":2,"score":0.47980526089668274},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45422297716140747},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.42481905221939087},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.421505331993103},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41531217098236084},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35455965995788574},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.21618875861167908},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16073939204216003},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08821207284927368},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2021.3085142","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3085142","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09444458.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:79a39ba6debb44519b40eb48b79a0470","is_oa":true,"landing_page_url":"https://doaj.org/article/79a39ba6debb44519b40eb48b79a0470","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 9, Pp 93152-93164 (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2021.3085142","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3085142","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09444458.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G575676360","display_name":null,"funder_award_id":"2020-0-00440","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G581076360","display_name":null,"funder_award_id":"2020-0-00440","funder_id":"https://openalex.org/F4320324891","funder_display_name":"Iran Telecommunication Research Center"},{"id":"https://openalex.org/G6072120315","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G6785251721","display_name":null,"funder_award_id":"2020-0-00440","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G7685055460","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"}],"funders":[{"id":"https://openalex.org/F4320324891","display_name":"Iran Telecommunication Research Center","ror":"https://ror.org/01a3g2z22"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3171174687.pdf","grobid_xml":"https://content.openalex.org/works/W3171174687.grobid-xml"},"referenced_works_count":67,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1757796397","https://openalex.org/W2094987262","https://openalex.org/W2107741520","https://openalex.org/W2118978333","https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2155968351","https://openalex.org/W2158782408","https://openalex.org/W2165150801","https://openalex.org/W2201581102","https://openalex.org/W2257979135","https://openalex.org/W2287077337","https://openalex.org/W2338318698","https://openalex.org/W2342662072","https://openalex.org/W2560647685","https://openalex.org/W2746553466","https://openalex.org/W2761873684","https://openalex.org/W2772721022","https://openalex.org/W2775482448","https://openalex.org/W2781726626","https://openalex.org/W2787800669","https://openalex.org/W2787938642","https://openalex.org/W2902456977","https://openalex.org/W2904246096","https://openalex.org/W2907953482","https://openalex.org/W2942608247","https://openalex.org/W2954882791","https://openalex.org/W2962839548","https://openalex.org/W2962867954","https://openalex.org/W2963296584","https://openalex.org/W2963477884","https://openalex.org/W2963641140","https://openalex.org/W2963864421","https://openalex.org/W2963923407","https://openalex.org/W2964001908","https://openalex.org/W2964291307","https://openalex.org/W2965870268","https://openalex.org/W2970586779","https://openalex.org/W3046082509","https://openalex.org/W3128395826","https://openalex.org/W4211171425","https://openalex.org/W4288363736","https://openalex.org/W4295719664","https://openalex.org/W4298857966","https://openalex.org/W4300799055","https://openalex.org/W4302570325","https://openalex.org/W6631190155","https://openalex.org/W6637967152","https://openalex.org/W6676320248","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6687681856","https://openalex.org/W6704571135","https://openalex.org/W6740801417","https://openalex.org/W6744838376","https://openalex.org/W6745935785","https://openalex.org/W6746809867","https://openalex.org/W6747473740","https://openalex.org/W6748554570","https://openalex.org/W6748594472","https://openalex.org/W6748839928","https://openalex.org/W6756754374","https://openalex.org/W6757592117","https://openalex.org/W6758223966","https://openalex.org/W6765121640","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W2264067234","https://openalex.org/W3124243301","https://openalex.org/W1571502335","https://openalex.org/W1589409554","https://openalex.org/W2759038785","https://openalex.org/W2172232600","https://openalex.org/W3123876860","https://openalex.org/W3124172198","https://openalex.org/W2046181650","https://openalex.org/W2142633247"],"abstract_inverted_index":{"Utilizing":[0],"the":[1,6,11,16,19,27,31,35,56,64,72,76,81,85,98,108,135,149,153,159,167,179,184,206,219,228,233,236,245],"collected":[2],"experience":[3,32,41,69,82,91],"tuples":[4,33,70,83],"in":[5,18,34,55,84,97,107,134],"replay":[7],"buffer":[8],"(RB)":[9],"is":[10,47,100],"primary":[12],"way":[13],"of":[14,68,80,111,152,235,244,252],"exploiting":[15],"experiences":[17,94,133],"off-policy":[20,57,186],"reinforcement":[21],"learning":[22,160,211],"(RL)":[23],"algorithms,":[24,239],"and,":[25],"therefore,":[26],"sampling":[28,53,67,79,122,129,155,181,230],"scheme":[29,54],"for":[30,40,191],"RB":[36,86,99],"can":[37],"be":[38],"critical":[39],"utilization.":[42],"In":[43,74,137],"this":[44,115],"paper,":[45],"it":[46],"found":[48],"that":[49,125,147],"a":[50,88,120,127,204],"widely":[51],"used":[52],"RL":[58,187,193,238],"suffers":[59],"from":[60,71],"inefficiency":[61],"due":[62],"to":[63,131,157,165,183],"inadequate":[65],"uneven":[66],"RB.":[73,136],"fact,":[75],"conventional":[77],"uniform":[78],"causes":[87],"severely":[89],"unbalanced":[90],"utilization,":[92],"since":[93],"stored":[95],"earlier":[96],"sampled":[101],"with":[102,221,241],"much":[103],"higher":[104,128],"frequency":[105],"especially":[106,217],"early":[109],"stage":[110],"learning.":[112],"We":[113],"mitigate":[114,166],"fundamental":[116],"problem":[117],"by":[118],"employing":[119],"half-normal":[121,154],"probability":[123,130],"window":[124,156],"allocates":[126],"newer":[132],"addition,":[138],"we":[139,177],"propose":[140],"general":[141],"and":[142,162,164,189,200,213,224],"local":[143],"size":[144],"adjustment":[145],"schemes":[146],"determine":[148],"standard":[150],"deviation":[151],"enhance":[158],"speed":[161,212],"performance":[163,169,175,215,246],"temporary":[168],"degradation":[170],"during":[171],"training,":[172],"respectively.":[173],"For":[174],"demonstration,":[176],"apply":[178],"proposed":[180,207,229],"technique":[182,208,231],"state-of-the-art":[185],"algorithms":[188],"test":[190],"various":[192],"benchmark":[194],"tasks":[195,220],"such":[196],"as":[197],"MuJoCo":[198],"gym":[199],"CARLA":[201],"simulator.":[202],"As":[203],"result,":[205],"shows":[209],"considerable":[210],"final":[214],"improvement,":[216],"on":[218],"large":[222],"state":[223],"action":[225],"space.":[226],"Furthermore,":[227],"increases":[232],"stability":[234],"considered":[237],"verified":[240],"less":[242],"variance":[243],"results":[247],"across":[248],"different":[249],"random":[250],"seeds":[251],"network":[253],"initialization.":[254]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2026-03-16T09:10:04.655348","created_date":"2025-10-10T00:00:00"}
