{"id":"https://openalex.org/W4409659344","doi":"https://doi.org/10.1109/tnnls.2025.3557930","title":"A Two-Stage Selective Experience Replay for Double-Actor Deep Reinforcement Learning","display_name":"A Two-Stage Selective Experience Replay for Double-Actor Deep Reinforcement Learning","publication_year":2025,"publication_date":"2025-04-22","ids":{"openalex":"https://openalex.org/W4409659344","doi":"https://doi.org/10.1109/tnnls.2025.3557930","pmid":"https://pubmed.ncbi.nlm.nih.gov/40261777"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2025.3557930","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3557930","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100771701","display_name":"Meng Xu","orcid":"https://orcid.org/0000-0003-4857-5439"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Meng Xu","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":"https://orcid.org/0000-0003-4857-5439","affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071933192","display_name":"Xinhong Chen","orcid":"https://orcid.org/0000-0002-8563-148X"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xinhong Chen","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":"https://orcid.org/0000-0002-8563-148X","affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101700401","display_name":"Zihao Wen","orcid":"https://orcid.org/0000-0002-5401-5393"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Zihao Wen","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111335760","display_name":"Weiwei Fu","orcid":null},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Weiwei Fu","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100356291","display_name":"Jianping Wang","orcid":"https://orcid.org/0000-0002-9318-1482"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Jianping Wang","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China"],"raw_orcid":"https://orcid.org/0000-0002-9318-1482","affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong, SAR, China","institution_ids":["https://openalex.org/I168719708"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.2679,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.93779468,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"36","issue":"9","first_page":"16864","last_page":"16878"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10581","display_name":"Neural dynamics and brain function","score":0.9713000059127808,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10581","display_name":"Neural dynamics and brain function","score":0.9713000059127808,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.9593999981880188,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9226999878883362,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stage","display_name":"Stage (stratigraphy)","score":0.6040406227111816},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6014982461929321},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.4336366653442383},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.40718328952789307},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31070011854171753},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.0907755196094513},{"id":"https://openalex.org/keywords/paleontology","display_name":"Paleontology","score":0.06313350796699524}],"concepts":[{"id":"https://openalex.org/C146357865","wikidata":"https://www.wikidata.org/wiki/Q1123245","display_name":"Stage (stratigraphy)","level":2,"score":0.6040406227111816},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6014982461929321},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4336366653442383},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.40718328952789307},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31070011854171753},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0907755196094513},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.06313350796699524}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2025.3557930","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3557930","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:40261777","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40261777","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W2964048876","https://openalex.org/W3004477615","https://openalex.org/W3127561923","https://openalex.org/W3152878473","https://openalex.org/W3170872007","https://openalex.org/W3189480080","https://openalex.org/W3199711351","https://openalex.org/W3210258147","https://openalex.org/W4280508537","https://openalex.org/W4283582213","https://openalex.org/W4291393104","https://openalex.org/W4308211465","https://openalex.org/W4312459443","https://openalex.org/W4313059722","https://openalex.org/W4313362050","https://openalex.org/W4323662139","https://openalex.org/W4384789633","https://openalex.org/W4385286429","https://openalex.org/W4385737557","https://openalex.org/W4386024530","https://openalex.org/W4387789518","https://openalex.org/W4387903897","https://openalex.org/W4388783832","https://openalex.org/W4391559291","https://openalex.org/W4391697075","https://openalex.org/W4391912934","https://openalex.org/W4391974559","https://openalex.org/W4396506409","https://openalex.org/W4396605204","https://openalex.org/W4396754757","https://openalex.org/W4397032818","https://openalex.org/W4399541205","https://openalex.org/W4399665988","https://openalex.org/W4401452744","https://openalex.org/W4402435476","https://openalex.org/W4402450920","https://openalex.org/W4405335163"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4306904969","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2138720691","https://openalex.org/W2376932109"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1],"learning":[2,132],"(DRL)":[3],"has":[4,28],"been":[5],"widely":[6],"applied":[7],"to":[8,52,61,97,119,155,188,214,218],"various":[9],"applications,":[10],"but":[11],"improving":[12],"the":[13,16,25,49,54,58,72,83,92,121,138,151,165,168,184,190,198,208,215,228,232,235,246],"exploration":[14,39],"and":[15,40,71,95,181,197,234,255],"accuracy":[17],"of":[18,124,140,148,159,248],"Q-value":[19,41],"estimation":[20],"remain":[21],"key":[22],"challenges.":[23],"Recently,":[24],"double-actor":[26,44,116,141,252],"architecture":[27],"emerged":[29],"as":[30,179],"a":[31,77,106,157,175],"promising":[32],"DRL":[33,45,117,142,253,260],"framework":[34],"that":[35,109,242],"can":[36,89,110,225],"enhance":[37],"both":[38],"estimation.":[42],"Existing":[43],"methods":[46,118,143,254,261],"sample":[47,195],"from":[48,207],"replay":[50,169],"buffer":[51,170],"update":[53,62,94,219],"two":[55,145],"actors;":[56],"however,":[57],"samples":[59,166,233],"used":[60],"each":[63,147,194],"actor":[64,85,129,199,217,237],"are":[65,204],"generated":[66],"by":[67],"its":[68],"previous":[69],"versions":[70],"other":[73],"actor,":[74],"resulting":[75],"in":[76,167],"different":[78],"data":[79,125],"distribution":[80,126,212,229],"compared":[81],"with":[82,210],"current":[84,216,236],"being":[86,201,238],"updated,":[87],"which":[88,149],"negatively":[90],"impact":[91],"actor's":[93],"lead":[96],"suboptimal":[98],"policies.":[99,134],"To":[100],"this":[101,103,222],"end,":[102],"work":[104],"proposes":[105],"generic":[107],"solution":[108],"be":[111],"seamlessly":[112],"integrated":[113],"into":[114,144,171],"existing":[115],"mitigate":[120,227],"adverse":[122],"effects":[123],"differences":[127,192,213],"on":[128],"updates,":[130],"thereby":[131],"better":[133],"Specifically,":[135],"we":[136,224],"decompose":[137],"updates":[139],"stages,":[146],"uses":[150],"same":[152],"sampling":[153,162],"approach":[154,163],"train":[156],"pair":[158],"actor-critic.":[160],"This":[161],"classifies":[164],"distinct":[172],"categories":[173,209],"using":[174],"clustering":[176],"technique,":[177],"such":[178],"K-means,":[180],"subsequently":[182],"employs":[183],"Jensen-Shannon":[185],"(JS)":[186],"divergence":[187],"evaluate":[189],"distributional":[191],"between":[193,231],"category":[196],"currently":[200],"updated.":[202,239],"Samples":[203],"then":[205],"prioritized":[206],"smaller":[211],"it.":[220],"In":[221],"way,":[223],"effectively":[226],"difference":[230],"Experiments":[240],"demonstrate":[241],"our":[243],"method":[244],"enhances":[245],"performance":[247],"five":[249],"state-of-the-art":[250],"(SOTA)":[251],"outperforms":[256],"eight":[257,263],"SOTA":[258],"single-actor":[259],"across":[262],"tasks.":[264]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
