{"id":"https://openalex.org/W3152346276","doi":"https://doi.org/10.1155/2021/6658724","title":"Averaged Soft Actor\u2010Critic for Deep Reinforcement Learning","display_name":"Averaged Soft Actor\u2010Critic for Deep Reinforcement Learning","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3152346276","doi":"https://doi.org/10.1155/2021/6658724","mag":"3152346276"},"language":"en","primary_location":{"id":"doi:10.1155/2021/6658724","is_oa":true,"landing_page_url":"https://doi.org/10.1155/2021/6658724","pdf_url":"https://downloads.hindawi.com/journals/complexity/2021/6658724.pdf","source":{"id":"https://openalex.org/S207319443","display_name":"Complexity","issn_l":"1076-2787","issn":["1076-2787","1099-0526"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319869","host_organization_name":"Hindawi Publishing Corporation","host_organization_lineage":["https://openalex.org/P4310319869"],"host_organization_lineage_names":["Hindawi Publishing Corporation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complexity","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://downloads.hindawi.com/journals/complexity/2021/6658724.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059568370","display_name":"Feng Ding","orcid":"https://orcid.org/0000-0001-7642-4182"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Ding","raw_affiliation_strings":["School of Software Technology, Dalian University of Technology, Dalian, China","School of Software Technology, Dalian University of Technology, Dalian"],"raw_orcid":"https://orcid.org/0000-0001-7642-4182","affiliations":[{"raw_affiliation_string":"School of Software Technology, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]},{"raw_affiliation_string":"School of Software Technology, Dalian University of Technology, Dalian","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021462940","display_name":"Guanfeng Ma","orcid":"https://orcid.org/0000-0002-9750-5851"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanfeng Ma","raw_affiliation_strings":["School of Software Technology, Dalian University of Technology, Dalian, China","School of Software Technology, Dalian University of Technology, Dalian"],"raw_orcid":"https://orcid.org/0000-0002-9750-5851","affiliations":[{"raw_affiliation_string":"School of Software Technology, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]},{"raw_affiliation_string":"School of Software Technology, Dalian University of Technology, Dalian","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053156190","display_name":"Zhikui Chen","orcid":"https://orcid.org/0000-0002-9209-2189"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhikui Chen","raw_affiliation_strings":["School of Software Technology, Dalian University of Technology, Dalian, China","School of Software Technology, Dalian University of Technology, Dalian"],"raw_orcid":"https://orcid.org/0000-0002-9209-2189","affiliations":[{"raw_affiliation_string":"School of Software Technology, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]},{"raw_affiliation_string":"School of Software Technology, Dalian University of Technology, Dalian","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100781385","display_name":"Jing Gao","orcid":"https://orcid.org/0000-0001-5099-6991"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jing Gao","raw_affiliation_strings":["School of Software Technology, Dalian University of Technology, Dalian, China","School of Software Technology, Dalian University of Technology, Dalian"],"raw_orcid":"https://orcid.org/0000-0001-5099-6991","affiliations":[{"raw_affiliation_string":"School of Software Technology, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]},{"raw_affiliation_string":"School of Software Technology, Dalian University of Technology, Dalian","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100432725","display_name":"Peng Li","orcid":"https://orcid.org/0000-0002-7138-430X"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Li","raw_affiliation_strings":["School of Software Technology, Dalian University of Technology, Dalian, China","School of Software Technology, Dalian University of Technology, Dalian"],"raw_orcid":"https://orcid.org/0000-0002-7138-430X","affiliations":[{"raw_affiliation_string":"School of Software Technology, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]},{"raw_affiliation_string":"School of Software Technology, Dalian University of Technology, Dalian","institution_ids":["https://openalex.org/I27357992"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100781385"],"corresponding_institution_ids":["https://openalex.org/I27357992"],"apc_list":{"value":2300,"currency":"USD","value_usd":2300},"apc_paid":{"value":2300,"currency":"USD","value_usd":2300},"fwci":2.2389,"has_fulltext":true,"cited_by_count":25,"citation_normalized_percentile":{"value":0.89814206,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"2021","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.972000002861023,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8294723033905029},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6421747803688049},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5771760940551758},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5541067123413086},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4890504777431488},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4846973419189453},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4010588228702545},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.370068222284317}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8294723033905029},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6421747803688049},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5771760940551758},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5541067123413086},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4890504777431488},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4846973419189453},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4010588228702545},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.370068222284317},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1155/2021/6658724","is_oa":true,"landing_page_url":"https://doi.org/10.1155/2021/6658724","pdf_url":"https://downloads.hindawi.com/journals/complexity/2021/6658724.pdf","source":{"id":"https://openalex.org/S207319443","display_name":"Complexity","issn_l":"1076-2787","issn":["1076-2787","1099-0526"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319869","host_organization_name":"Hindawi Publishing Corporation","host_organization_lineage":["https://openalex.org/P4310319869"],"host_organization_lineage_names":["Hindawi Publishing Corporation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complexity","raw_type":"journal-article"},{"id":"pmh:oai:RePEc:hin:complx:6658724","is_oa":false,"landing_page_url":"http://downloads.hindawi.com/journals/complexity/2021/6658724.xml","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:doaj.org/article:03969153b56d499d950ce34cd5b8e312","is_oa":true,"landing_page_url":"https://doaj.org/article/03969153b56d499d950ce34cd5b8e312","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Complexity, Vol 2021 (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1155/2021/6658724","is_oa":true,"landing_page_url":"https://doi.org/10.1155/2021/6658724","pdf_url":"https://downloads.hindawi.com/journals/complexity/2021/6658724.pdf","source":{"id":"https://openalex.org/S207319443","display_name":"Complexity","issn_l":"1076-2787","issn":["1076-2787","1099-0526"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319869","host_organization_name":"Hindawi Publishing Corporation","host_organization_lineage":["https://openalex.org/P4310319869"],"host_organization_lineage_names":["Hindawi Publishing Corporation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complexity","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G113823925","display_name":null,"funder_award_id":"61672123","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1750754954","display_name":null,"funder_award_id":"DUT20LAB136","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2511742439","display_name":null,"funder_award_id":"DUT20LAB136","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G3018656913","display_name":null,"funder_award_id":"61672123","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G3193216473","display_name":null,"funder_award_id":"61672123","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G3702097174","display_name":null,"funder_award_id":"62002044","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G3930664570","display_name":null,"funder_award_id":"61602083","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G4330989301","display_name":null,"funder_award_id":"62002044","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G4746874540","display_name":null,"funder_award_id":"61602083","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G494772637","display_name":null,"funder_award_id":"61602083","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5430529641","display_name":null,"funder_award_id":"DUT20LAB136","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G7883745928","display_name":null,"funder_award_id":"62002044","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3152346276.pdf","grobid_xml":"https://content.openalex.org/works/W3152346276.grobid-xml"},"referenced_works_count":8,"referenced_works":["https://openalex.org/W1731081199","https://openalex.org/W2091565802","https://openalex.org/W2125612430","https://openalex.org/W2145339207","https://openalex.org/W2155007355","https://openalex.org/W2999778183","https://openalex.org/W3002044607","https://openalex.org/W6682849425"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W3049728571"],"abstract_inverted_index":{"With":[0],"the":[1,4,25,30,50,71,75,89,96,115,123,131,136,139,143,146],"advent":[2],"of":[3,6,29,58,77,99,117,138,145],"era":[5],"artificial":[7,21],"intelligence,":[8],"deep":[9],"reinforcement":[10],"learning":[11],"(DRL)":[12],"has":[13,64],"achieved":[14],"unprecedented":[15],"success":[16],"in":[17,122],"high\u2010dimensional":[18],"and":[19,27,52,110,142],"large\u2010scale":[20],"intelligence":[22],"tasks.":[23],"However,":[24,61],"insecurity":[26],"instability":[28],"DRL":[31],"algorithm":[32,44,84,133,141],"have":[33],"an":[34],"important":[35],"impact":[36],"on":[37],"its":[38],"performance.":[39,112],"The":[40,126],"Soft":[41],"Actor\u2010Critic":[42],"(SAC)":[43],"uses":[45],"advanced":[46],"functions":[47],"to":[48,55,69,104],"update":[49],"policy":[51],"value":[53],"network":[54],"alleviate":[56],"some":[57,65,120],"these":[59],"problems.":[60,66],"SAC":[62,83,140],"still":[63],"In":[67],"order":[68],"reduce":[70],"error":[72],"caused":[73],"by":[74],"overestimation":[76,97],"SAC,":[78],"we":[79],"propose":[80],"a":[81,105],"new":[82],"called":[85],"Averaged\u2010SAC.":[86],"By":[87],"averaging":[88],"previously":[90],"learned":[91],"action\u2010state":[92],"estimates,":[93],"it":[94],"reduces":[95],"problem":[98],"soft":[100],"Q\u2010learning,":[101],"thereby":[102],"contributing":[103],"more":[106],"stable":[107],"training":[108,147],"process":[109],"improving":[111],"We":[113],"evaluate":[114],"performance":[116,137],"Averaged\u2010SAC":[118,132],"through":[119],"games":[121],"MuJoCo":[124],"environment.":[125],"experimental":[127],"results":[128],"show":[129],"that":[130],"effectively":[134],"improves":[135],"stability":[144],"process.":[148]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":5}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
