{"id":"https://openalex.org/W4406983280","doi":"https://doi.org/10.1109/tpami.2025.3537087","title":"Distributional Soft Actor-Critic With Three Refinements","display_name":"Distributional Soft Actor-Critic With Three Refinements","publication_year":2025,"publication_date":"2025-01-30","ids":{"openalex":"https://openalex.org/W4406983280","doi":"https://doi.org/10.1109/tpami.2025.3537087","pmid":"https://pubmed.ncbi.nlm.nih.gov/40031258"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2025.3537087","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3537087","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067909017","display_name":"Jingliang Duan","orcid":"https://orcid.org/0000-0002-3697-1576"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jingliang Duan","raw_affiliation_strings":["School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-3697-1576","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084395808","display_name":"Wenxuan Wang","orcid":"https://orcid.org/0000-0002-0193-816X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenxuan Wang","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-0193-816X","affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028599970","display_name":"Liming Xiao","orcid":"https://orcid.org/0009-0006-4566-1103"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liming Xiao","raw_affiliation_strings":["School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-4566-1103","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034999413","display_name":"Jiaxin Gao","orcid":"https://orcid.org/0000-0001-5455-0620"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaxin Gao","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5455-0620","affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100747108","display_name":"Shengbo Eben Li","orcid":"https://orcid.org/0000-0003-4923-3633"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengbo Eben Li","raw_affiliation_strings":["School of Vehicle and Mobility and College of AI, Tsinghua University, Beijing, China","School of Vehicle and Mobility &amp; College of AI, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-4923-3633","affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility and College of AI, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"School of Vehicle and Mobility &amp; College of AI, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100353166","display_name":"Chang Liu","orcid":"https://orcid.org/0000-0001-7686-2510"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chang Liu","raw_affiliation_strings":["College of Engineering, Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7686-2510","affiliations":[{"raw_affiliation_string":"College of Engineering, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ya-Qin Zhang","orcid":"https://orcid.org/0000-0003-4515-6212"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ya-Qin Zhang","raw_affiliation_strings":["Institute for AI Industry Research, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-4515-6212","affiliations":[{"raw_affiliation_string":"Institute for AI Industry Research, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100640936","display_name":"Bo Cheng","orcid":"https://orcid.org/0000-0002-1753-2922"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Cheng","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-1753-2922","affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031855986","display_name":"Keqiang Li","orcid":"https://orcid.org/0000-0002-9333-7416"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Keqiang Li","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9333-7416","affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5067909017"],"corresponding_institution_ids":["https://openalex.org/I92403157"],"apc_list":null,"apc_paid":null,"fwci":60.7009,"has_fulltext":false,"cited_by_count":53,"citation_normalized_percentile":{"value":0.99926081,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"47","issue":"5","first_page":"3935","last_page":"3946"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12002","display_name":"Computability, Logic, AI Algorithms","score":0.5968000292778015,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12002","display_name":"Computability, Logic, AI Algorithms","score":0.5968000292778015,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.645104169845581},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6210916638374329},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.43587127327919006}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.645104169845581},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6210916638374329},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.43587127327919006}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2025.3537087","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3537087","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:40031258","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40031258","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1267632332","display_name":null,"funder_award_id":"FRF-OT-23-02","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W2145339207","https://openalex.org/W2746553466","https://openalex.org/W2765302304","https://openalex.org/W2766447205","https://openalex.org/W2951360122","https://openalex.org/W2999778183","https://openalex.org/W3015082424","https://openalex.org/W3114647763","https://openalex.org/W3211345831","https://openalex.org/W4286901586","https://openalex.org/W4322729780","https://openalex.org/W4362650413","https://openalex.org/W4366158867","https://openalex.org/W4376481351","https://openalex.org/W4387415195","https://openalex.org/W4389044710","https://openalex.org/W4391019860","https://openalex.org/W4391427676","https://openalex.org/W4394006698","https://openalex.org/W4400944787","https://openalex.org/W6638018090","https://openalex.org/W6683300800","https://openalex.org/W6684921986","https://openalex.org/W6734517396","https://openalex.org/W6747473740","https://openalex.org/W6748839928","https://openalex.org/W6750645735","https://openalex.org/W6751629939","https://openalex.org/W6757592117","https://openalex.org/W6759679099","https://openalex.org/W6762639926","https://openalex.org/W6763153117","https://openalex.org/W6763305696","https://openalex.org/W6767112054","https://openalex.org/W6780559895","https://openalex.org/W6781014336","https://openalex.org/W6794416235","https://openalex.org/W6922480057"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,62,179],"(RL)":[2],"has":[3],"shown":[4],"remarkable":[5],"success":[6],"in":[7,87,170,197,207],"solving":[8],"complex":[9],"decision-making":[10],"and":[11,78,108,121,168,181],"control":[12],"tasks.":[13,146,210],"However,":[14],"many":[15],"model-free":[16,160],"RL":[17,54,161],"algorithms":[18],"experience":[19],"performance":[20,184],"degradation":[21],"due":[22,90],"to":[23,35,80,91,102,104],"inaccurate":[24],"value":[25,58,66,115,118],"estimation,":[26],"particularly":[27],"the":[28,45,148],"overestimation":[29],"of":[30,144],"Q-values,":[31],"which":[32],"can":[33],"lead":[34],"suboptimal":[36],"policies.":[37],"To":[38],"address":[39],"this":[40,95],"issue,":[41],"we":[42,97],"previously":[43],"proposed":[44],"Distributional":[46],"Soft":[47],"Actor-Critic":[48],"(DSAC":[49],"or":[50,135,157],"DSACv1),":[51],"an":[52],"off-policy":[53],"algorithm":[55],"that":[56],"enhances":[57],"estimation":[59,112],"accuracy":[60],"by":[61,84],"a":[63,141,177,199],"continuous":[64],"Gaussian":[65],"distribution.":[67],"Despite":[68],"its":[69,203],"effectiveness,":[70],"DSACv1":[71,103],"faces":[72],"challenges":[73],"such":[74],"as":[75],"training":[76],"instability":[77],"sensitivity":[79],"reward":[81,187],"scaling,":[82],"caused":[83],"high":[85],"variance":[86],"critic":[88,123],"gradients":[89],"return":[92],"randomness.":[93],"In":[94],"paper,":[96],"introduce":[98],"three":[99],"key":[100],"refinements":[101,133],"overcome":[105],"these":[106],"limitations":[107],"further":[109,192],"improve":[110],"Q-value":[111],"accuracy:":[113],"expected":[114],"substitution,":[116],"twin":[117],"distribution":[119],"learning,":[120],"variance-based":[122],"gradient":[124],"adjustment.":[125],"The":[126],"enhanced":[127],"algorithm,":[128],"termed":[129],"DSAC":[130],"with":[131],"Three":[132],"(DSAC-T":[134],"DSACv2),":[136],"is":[137,191],"systematically":[138],"evaluated":[139],"across":[140,185],"diverse":[142],"set":[143],"benchmark":[145],"Without":[147],"need":[149],"for":[150,205],"task-specific":[151],"hyperparameter":[152],"tuning,":[153],"DSAC-T":[154,175],"consistently":[155],"matches":[156],"outperforms":[158],"leading":[159],"algorithms,":[162],"including":[163],"SAC,":[164],"TD3,":[165],"DDPG,":[166],"TRPO,":[167],"PPO,":[169],"all":[171],"tested":[172],"environments.":[173],"Additionally,":[174],"ensures":[176],"stable":[178],"process":[180],"maintains":[182],"robust":[183],"varying":[186],"scales.":[188],"Its":[189],"effectiveness":[190],"demonstrated":[193],"through":[194],"real-world":[195],"application":[196],"controlling":[198],"wheeled":[200],"robot,":[201],"highlighting":[202],"potential":[204],"deployment":[206],"practical":[208],"robotic":[209]},"counts_by_year":[{"year":2026,"cited_by_count":19},{"year":2025,"cited_by_count":32},{"year":2024,"cited_by_count":2}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
