{"id":"https://openalex.org/W4281258657","doi":"https://doi.org/10.1109/tnnls.2022.3174051","title":"Improved Soft Actor-Critic: Mixing Prioritized Off-Policy Samples With On-Policy Experiences","display_name":"Improved Soft Actor-Critic: Mixing Prioritized Off-Policy Samples With On-Policy Experiences","publication_year":2022,"publication_date":"2022-05-19","ids":{"openalex":"https://openalex.org/W4281258657","doi":"https://doi.org/10.1109/tnnls.2022.3174051","pmid":"https://pubmed.ncbi.nlm.nih.gov/35588412"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2022.3174051","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2022.3174051","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043976016","display_name":"Chayan Banerjee","orcid":"https://orcid.org/0000-0003-1039-3744"},"institutions":[{"id":"https://openalex.org/I78757542","display_name":"University of Newcastle Australia","ror":"https://ror.org/00eae9z71","country_code":"AU","type":"education","lineage":["https://openalex.org/I78757542"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Chayan Banerjee","raw_affiliation_strings":["School of Engineering, The University of Newcastle, Callaghan, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0003-1039-3744","affiliations":[{"raw_affiliation_string":"School of Engineering, The University of Newcastle, Callaghan, NSW, Australia","institution_ids":["https://openalex.org/I78757542"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100370053","display_name":"Zhiyong Chen","orcid":"https://orcid.org/0000-0002-2033-4249"},"institutions":[{"id":"https://openalex.org/I78757542","display_name":"University of Newcastle Australia","ror":"https://ror.org/00eae9z71","country_code":"AU","type":"education","lineage":["https://openalex.org/I78757542"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zhiyong Chen","raw_affiliation_strings":["School of Engineering, The University of Newcastle, Callaghan, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0002-2033-4249","affiliations":[{"raw_affiliation_string":"School of Engineering, The University of Newcastle, Callaghan, NSW, Australia","institution_ids":["https://openalex.org/I78757542"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008309887","display_name":"Nasimul Noman","orcid":"https://orcid.org/0000-0002-8566-0870"},"institutions":[{"id":"https://openalex.org/I78757542","display_name":"University of Newcastle Australia","ror":"https://ror.org/00eae9z71","country_code":"AU","type":"education","lineage":["https://openalex.org/I78757542"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Nasimul Noman","raw_affiliation_strings":["School of Information and Physical Sciences, The University of Newcastle, Callaghan, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0002-8566-0870","affiliations":[{"raw_affiliation_string":"School of Information and Physical Sciences, The University of Newcastle, Callaghan, NSW, Australia","institution_ids":["https://openalex.org/I78757542"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.7978,"has_fulltext":false,"cited_by_count":53,"citation_normalized_percentile":{"value":0.97259188,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"35","issue":"3","first_page":"3121","last_page":"3129"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9828000068664551,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13219","display_name":"Mind wandering and attention","score":0.9480000138282776,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7156437039375305},{"id":"https://openalex.org/keywords/randomness","display_name":"Randomness","score":0.6241356134414673},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6119093298912048},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.4902244210243225},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.44718775153160095},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3769274652004242},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32437625527381897},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3214157223701477},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18919101357460022},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.11774489283561707}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7156437039375305},{"id":"https://openalex.org/C125112378","wikidata":"https://www.wikidata.org/wiki/Q176640","display_name":"Randomness","level":2,"score":0.6241356134414673},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6119093298912048},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.4902244210243225},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.44718775153160095},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3769274652004242},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32437625527381897},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3214157223701477},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18919101357460022},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.11774489283561707},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tnnls.2022.3174051","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2022.3174051","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:35588412","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35588412","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null},{"id":"pmh:uon:55038","is_oa":false,"landing_page_url":"http://hdl.handle.net/1959.13/1500924","pdf_url":null,"source":{"id":"https://openalex.org/S4377196471","display_name":"NOVA (University of Newcastle Australia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I78757542","host_organization_name":"University of Newcastle Australia","host_organization_lineage":["https://openalex.org/I78757542"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"journal article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":68,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2145339207","https://openalex.org/W2155007355","https://openalex.org/W2158782408","https://openalex.org/W2201581102","https://openalex.org/W2609650878","https://openalex.org/W2736601468","https://openalex.org/W2739330054","https://openalex.org/W2746553466","https://openalex.org/W2761873684","https://openalex.org/W2775482448","https://openalex.org/W2789901741","https://openalex.org/W2885541157","https://openalex.org/W2904246096","https://openalex.org/W2907537824","https://openalex.org/W2946901134","https://openalex.org/W2948708918","https://openalex.org/W2963095800","https://openalex.org/W2963296584","https://openalex.org/W2963582482","https://openalex.org/W2963864421","https://openalex.org/W2963959137","https://openalex.org/W2964082094","https://openalex.org/W2964114602","https://openalex.org/W2979229105","https://openalex.org/W2982316857","https://openalex.org/W2994803162","https://openalex.org/W2998069546","https://openalex.org/W3004752404","https://openalex.org/W3035954878","https://openalex.org/W3036625561","https://openalex.org/W3037849017","https://openalex.org/W3106491339","https://openalex.org/W4300799055","https://openalex.org/W4302282707","https://openalex.org/W6631190155","https://openalex.org/W6638018090","https://openalex.org/W6682849425","https://openalex.org/W6683195989","https://openalex.org/W6683204974","https://openalex.org/W6683300800","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6685444567","https://openalex.org/W6687681856","https://openalex.org/W6692846177","https://openalex.org/W6730111887","https://openalex.org/W6736495777","https://openalex.org/W6740801417","https://openalex.org/W6741002519","https://openalex.org/W6747473740","https://openalex.org/W6747682777","https://openalex.org/W6748554570","https://openalex.org/W6748638692","https://openalex.org/W6748839928","https://openalex.org/W6749892895","https://openalex.org/W6753199549","https://openalex.org/W6758315252","https://openalex.org/W6762935222","https://openalex.org/W6763177082","https://openalex.org/W6763213874","https://openalex.org/W6767047803","https://openalex.org/W6768350423","https://openalex.org/W6770456248","https://openalex.org/W6772562993","https://openalex.org/W6779402086","https://openalex.org/W6779982523","https://openalex.org/W6780559895"],"related_works":["https://openalex.org/W3034924094","https://openalex.org/W3094954546","https://openalex.org/W1488708774","https://openalex.org/W1982811510","https://openalex.org/W4391100477","https://openalex.org/W2402189625","https://openalex.org/W2386410636","https://openalex.org/W3038962357","https://openalex.org/W2025663273","https://openalex.org/W3099153698"],"abstract_inverted_index":{"Soft":[0],"actor-critic":[1,6],"(SAC)":[2],"is":[3,173],"an":[4,57],"off-policy":[5,52,58,133],"(AC)":[7],"reinforcement":[8],"learning":[9],"(RL)":[10],"algorithm,":[11],"essentially":[12],"based":[13],"on":[14,40,182],"entropy":[15,29],"regularization.":[16],"SAC":[17,54,95,106,155,161],"trains":[18],"a":[19,41,70,111,128,183],"policy":[20,78,143],"by":[21],"maximizing":[22],"the":[23,32,37,74,77,92,120,131,136,142,153,168],"trade-off":[24],"between":[25],"expected":[26],"return":[27],"and":[28,51,79,96,144,156,162,177],"(randomness":[30],"in":[31,56,69,189],"policy).":[33],"It":[34,172],"has":[35],"achieved":[36],"state-of-the-art":[38],"performance":[39,93],"range":[42],"of":[43,76,94,130,160,185],"continuous":[44,186],"control":[45,187],"benchmark":[46],"tasks,":[47],"outperforming":[48],"prior":[49],"on-policy":[50,138],"methods.":[53],"works":[55],"fashion":[59],"where":[60],"data":[61,134,139],"are":[62,83],"sampled":[63],"uniformly":[64],"from":[65,119],"past":[66],"experiences":[67],"(stored":[68],"buffer)":[71],"using":[72],"which":[73],"parameters":[75],"value":[80,145],"function":[81,146],"networks":[82],"updated.":[84],"We":[85,148],"propose":[86],"certain":[87],"crucial":[88],"modifications":[89],"for":[90,115,140],"boosting":[91],"making":[97],"it":[98],"more":[99,175],"sample":[100,178],"efficient.":[101],"In":[102],"our":[103,150,165],"proposed":[104],"improved":[105],"(ISAC),":[107],"we":[108,126],"first":[109],"introduce":[110],"new":[112],"prioritization":[113],"scheme":[114],"selecting":[116],"better":[117],"samples":[118],"experience":[121],"replay":[122],"(ER)":[123],"buffer.":[124],"Second":[125],"use":[127],"mixture":[129],"prioritized":[132],"with":[135,152],"latest":[137],"training":[141],"networks.":[147],"compare":[149],"approach":[151,166],"vanilla":[154],"some":[157],"recent":[158],"variants":[159],"show":[163],"that":[164],"outperforms":[167],"said":[169],"algorithmic":[170],"benchmarks.":[171],"comparatively":[174],"stable":[176],"efficient":[179],"when":[180],"tested":[181],"number":[184],"tasks":[188],"MuJoCo":[190],"environments.":[191]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":20},{"year":2024,"cited_by_count":18},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
