{"id":"https://openalex.org/W4400944787","doi":"https://doi.org/10.1109/tiv.2024.3432891","title":"Multi-Style Distributional Soft Actor-Critic: Learning a Unified Policy for Diverse Control Behaviors","display_name":"Multi-Style Distributional Soft Actor-Critic: Learning a Unified Policy for Diverse Control Behaviors","publication_year":2024,"publication_date":"2024-07-24","ids":{"openalex":"https://openalex.org/W4400944787","doi":"https://doi.org/10.1109/tiv.2024.3432891"},"language":"en","primary_location":{"id":"doi:10.1109/tiv.2024.3432891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tiv.2024.3432891","pdf_url":null,"source":{"id":"https://openalex.org/S4210199657","display_name":"IEEE Transactions on Intelligent Vehicles","issn_l":"2379-8858","issn":["2379-8858","2379-8904"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Intelligent Vehicles","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028599970","display_name":"Liming Xiao","orcid":"https://orcid.org/0009-0006-4566-1103"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liming Xiao","raw_affiliation_strings":["School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","School of Mechanical Engineering, University of Science and Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-4566-1103","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083537762","display_name":"Yao Lyu","orcid":"https://orcid.org/0000-0003-1539-472X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Lyu","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-1539-472X","affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103031147","display_name":"Fawang Zhang","orcid":"https://orcid.org/0009-0002-7455-9694"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fawang Zhang","raw_affiliation_strings":["School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-7455-9694","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026361465","display_name":"Liangfa Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liangfa Chen","raw_affiliation_strings":["School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","School of Mechanical Engineering, University of Science and Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-8442-3205","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102988959","display_name":"Guangyuan Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangyuan Yu","raw_affiliation_strings":["School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","School of Mechanical Engineering, University of Science and Technology, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100747108","display_name":"Shengbo Eben Li","orcid":"https://orcid.org/0000-0003-4923-3633"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengbo Eben Li","raw_affiliation_strings":["School of Vehicle and Mobility, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-4923-3633","affiliations":[{"raw_affiliation_string":"School of Vehicle and Mobility, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007755391","display_name":"Fei Ma","orcid":"https://orcid.org/0000-0002-3184-9520"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Ma","raw_affiliation_strings":["School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","School of Mechanical Engineering, University of Science and Technology, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067909017","display_name":"Jingliang Duan","orcid":"https://orcid.org/0000-0002-3697-1576"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingliang Duan","raw_affiliation_strings":["School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","School of Mechanical Engineering, University of Science and Technology, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-3697-1576","affiliations":[{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"School of Mechanical Engineering, University of Science and Technology, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5028599970"],"corresponding_institution_ids":["https://openalex.org/I92403157"],"apc_list":null,"apc_paid":null,"fwci":2.1634,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.9151782,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"10","issue":"3","first_page":"1759","last_page":"1770"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11270","display_name":"Complex Systems and Time Series Analysis","score":0.32899999618530273,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11270","display_name":"Complex Systems and Time Series Analysis","score":0.32899999618530273,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11031","display_name":"Game Theory and Applications","score":0.302700012922287,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/style","display_name":"Style (visual arts)","score":0.7411909103393555},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5508614778518677},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.5259292721748352},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.43197891116142273},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3418791890144348},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25346291065216064},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.12597358226776123},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.12304788827896118},{"id":"https://openalex.org/keywords/visual-arts","display_name":"Visual arts","score":0.06259819865226746}],"concepts":[{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.7411909103393555},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5508614778518677},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.5259292721748352},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.43197891116142273},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3418791890144348},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25346291065216064},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.12597358226776123},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.12304788827896118},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.06259819865226746}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tiv.2024.3432891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tiv.2024.3432891","pdf_url":null,"source":{"id":"https://openalex.org/S4210199657","display_name":"IEEE Transactions on Intelligent Vehicles","issn_l":"2379-8858","issn":["2379-8858","2379-8904"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Intelligent Vehicles","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1267632332","display_name":null,"funder_award_id":"FRF-OT-23-02","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1527702126","https://openalex.org/W2158782408","https://openalex.org/W2521921275","https://openalex.org/W2604382266","https://openalex.org/W2765302304","https://openalex.org/W2951360122","https://openalex.org/W2963403593","https://openalex.org/W3000638052","https://openalex.org/W3015082424","https://openalex.org/W3114647763","https://openalex.org/W3176912151","https://openalex.org/W4210434594","https://openalex.org/W4213443150","https://openalex.org/W4224231583","https://openalex.org/W4225773298","https://openalex.org/W4283823813","https://openalex.org/W4317795250","https://openalex.org/W4323897042","https://openalex.org/W4362650357","https://openalex.org/W4366158867","https://openalex.org/W4376481351","https://openalex.org/W4383109371","https://openalex.org/W4385065411","https://openalex.org/W4389430479","https://openalex.org/W4389666601","https://openalex.org/W4390422151","https://openalex.org/W4392406006","https://openalex.org/W4393160649","https://openalex.org/W4394006698","https://openalex.org/W4404782462","https://openalex.org/W4406983280","https://openalex.org/W6638018090","https://openalex.org/W6683300800","https://openalex.org/W6684921986","https://openalex.org/W6734517396","https://openalex.org/W6747473740","https://openalex.org/W6748839928","https://openalex.org/W6750645735","https://openalex.org/W6751629939","https://openalex.org/W6757592117","https://openalex.org/W6780559895","https://openalex.org/W6922480057"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,75],"(RL)":[2],"has":[3],"excelled":[4],"in":[5,24],"sequential":[6],"decision-making":[7],"and":[8,144,187,226],"control":[9,22,31,47,82,212,229],"tasks,":[10],"yet":[11],"traditional":[12],"RL":[13,35],"algorithms":[14],"are":[15,58],"limited":[16],"by":[17,86,181],"adherence":[18],"to":[19,28,45,167],"a":[20,76,88,134,162,203,209],"single":[21,77],"style":[23,135],"identical":[25],"scenarios,":[26],"failing":[27],"address":[29],"varied":[30],"preferences.":[32],"Existing":[33],"multi-style":[34,67,89],"methods":[36],"typically":[37],"require":[38],"customized":[39],"reward":[40],"or":[41],"objective":[42],"functions":[43],"tailored":[44],"specific":[46],"styles,":[48],"which":[49],"may":[50],"not":[51],"be":[52],"feasible":[53],"when":[54],"diverse":[55],"driving":[56],"styles":[57,213],"necessary.":[59],"To":[60],"overcome":[61],"these":[62,150],"limitations,":[63],"we":[64,124],"propose":[65],"the":[66,96,103,111,115,126,130,138,142,155,158,169,183,195,215,232],"distributional":[68],"soft":[69],"actor-critic":[70],"(M-DSAC)":[71],"algorithm,":[72],"capable":[73],"of":[74,99,129,140,177,211,217,234],"policy":[78,90,143,196,205],"that":[79,94,206],"supports":[80],"multiple":[81],"behaviors.":[83],"We":[84],"begin":[85],"developing":[87],"iteration":[91],"(MPI)":[92],"framework":[93],"learns":[95,202],"entire":[97],"distribution":[98,132,148],"returns,":[100],"known":[101],"as":[102,133],"value":[104,131,147,170,179,184],"distribution,":[105],"rather":[106],"than":[107],"just":[108],"focusing":[109],"on":[110],"expected":[112],"return":[113],"(i.e.,":[114],"<inline-formula":[116],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[117],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[118],"notation=\"LaTeX\">$Q$</tex-math></inline-formula>":[119],"value).":[120],"In":[121],"this":[122],"framework,":[123,157],"utilize":[125],"quantile":[127,151],"index":[128],"indicator,":[136],"enhancing":[137],"inputs":[139],"both":[141],"its":[145,237],"corresponding":[146],"with":[149,190],"indices.":[152],"Building":[153],"upon":[154],"MPI":[156],"M-DSAC":[159,200],"algorithm":[160],"employs":[161],"parameterized":[163],"diagonal":[164],"Gaussian":[165],"function":[166],"approximate":[168],"distribution.":[171],"This":[172],"approach":[173],"enables":[174],"efficient":[175],"computation":[176],"different":[178,198],"quantiles":[180],"combining":[182],"distribution's":[185],"mean":[186],"standard":[188],"deviations":[189],"appropriate":[191],"coefficients.":[192],"By":[193],"optimizing":[194],"across":[197],"quantiles,":[199],"efficiently":[201],"versatile":[204],"can":[207],"handle":[208],"range":[210],"without":[214],"burden":[216],"significant":[218],"computing":[219],"costs.":[220],"Experimental":[221],"evaluations":[222],"using":[223],"MuJoCo":[224],"benchmarks":[225],"real-world":[227],"robot":[228],"tasks":[230],"confirm":[231],"effectiveness":[233],"M-DSAC,":[235],"showcasing":[236],"broad":[238],"practical":[239],"applicability.":[240]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
