{"id":"https://openalex.org/W3216884019","doi":"https://doi.org/10.1109/tnnls.2021.3128666","title":"Empowering the Diversity and Individuality of Option: Residual Soft Option Critic Framework","display_name":"Empowering the Diversity and Individuality of Option: Residual Soft Option Critic Framework","publication_year":2021,"publication_date":"2021-12-01","ids":{"openalex":"https://openalex.org/W3216884019","doi":"https://doi.org/10.1109/tnnls.2021.3128666","mag":"3216884019","pmid":"https://pubmed.ncbi.nlm.nih.gov/34851834"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2021.3128666","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3128666","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Anjie Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Anjie Zhu","raw_affiliation_strings":["Center for Future Media, School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media, School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Feiyu Chen","orcid":"https://orcid.org/0000-0002-0928-6899"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feiyu Chen","raw_affiliation_strings":["Center for Future Media, School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media, School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hui Xu","orcid":"https://orcid.org/0000-0003-2081-555X"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Xu","raw_affiliation_strings":["Center for Future Media, School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media, School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Deqiang Ouyang","orcid":"https://orcid.org/0000-0003-2259-886X"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Deqiang Ouyang","raw_affiliation_strings":["College of Computer Science, Chongqing University, Chongqing, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science, Chongqing University, Chongqing, China","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"last","author":{"id":null,"display_name":"Jie Shao","orcid":"https://orcid.org/0000-0003-2615-1555"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Shao","raw_affiliation_strings":["Center for Future Media, School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media, School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":0.4199,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.70774966,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"34","issue":"8","first_page":"4816","last_page":"4825"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8754000067710876,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8754000067710876,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.013899999670684338,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.012799999676644802,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6474000215530396},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.633899986743927},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.6209999918937683},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5807999968528748},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4975000023841858},{"id":"https://openalex.org/keywords/diversity","display_name":"Diversity (politics)","score":0.4578000009059906}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.682699978351593},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6474000215530396},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.633899986743927},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.6209999918937683},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5807999968528748},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5372999906539917},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4975000023841858},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48489999771118164},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.4578000009059906},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.33390000462532043},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3257000148296356},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.30469998717308044},{"id":"https://openalex.org/C2780210234","wikidata":"https://www.wikidata.org/wiki/Q422638","display_name":"Action plan","level":2,"score":0.30059999227523804},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.25780001282691956},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.25440001487731934}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2021.3128666","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2021.3128666","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:34851834","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34851834","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4253300795","display_name":null,"funder_award_id":"61832001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1646707810","https://openalex.org/W2101881799","https://openalex.org/W2109910161","https://openalex.org/W2132714442","https://openalex.org/W2145339207","https://openalex.org/W2158782408","https://openalex.org/W2257979135","https://openalex.org/W2739678353","https://openalex.org/W2791797404","https://openalex.org/W2889939141","https://openalex.org/W2946694254","https://openalex.org/W2963430540","https://openalex.org/W2963761387","https://openalex.org/W2964227312","https://openalex.org/W2965889088","https://openalex.org/W3033043020","https://openalex.org/W6676557315","https://openalex.org/W6679518283","https://openalex.org/W6682849425","https://openalex.org/W6682859298","https://openalex.org/W6684818487","https://openalex.org/W6684921986","https://openalex.org/W6695011786","https://openalex.org/W6713347985","https://openalex.org/W6735209194","https://openalex.org/W6736368053","https://openalex.org/W6741002519","https://openalex.org/W6746404916","https://openalex.org/W6747231145","https://openalex.org/W6747473740","https://openalex.org/W6748566876","https://openalex.org/W6748603076","https://openalex.org/W6748839928","https://openalex.org/W6749821205","https://openalex.org/W6755176883","https://openalex.org/W6757592117","https://openalex.org/W6757947852","https://openalex.org/W6760495073","https://openalex.org/W6762399032","https://openalex.org/W6762722231","https://openalex.org/W6767130282","https://openalex.org/W6772135379","https://openalex.org/W6779728822","https://openalex.org/W6780559895"],"related_works":[],"abstract_inverted_index":{"Extracting":[0],"temporal":[1,44],"abstraction":[2,45],"(option),":[3],"which":[4,153],"empowers":[5],"the":[6,29,71,89,126],"action":[7,20,37],"space,":[8,21],"is":[9,46,135],"a":[10,18,42,47,77,131,141],"crucial":[11],"challenge":[12,49,62],"in":[13,28,160],"hierarchical":[14],"reinforcement":[15],"learning.":[16],"Under":[17],"well-structured":[19],"decision-making":[22],"agents":[23],"can":[24,108],"probe":[25],"more":[26],"deeply":[27],"searching":[30],"or":[31],"plan":[32],"efficiently":[33],"through":[34],"pruning":[35],"irrelevant":[36],"candidates.":[38],"However,":[39],"automatically":[40],"capturing":[41],"well-performed":[43],"nontrivial":[48],"due":[50],"to":[51,85,95,137],"its":[52],"insufficient":[53],"exploration":[54],"and":[55,68,111],"inadequate":[56],"functionality.":[57],"We":[58,113,146],"consider":[59],"alleviating":[60],"this":[61],"from":[63],"two":[64],"perspectives,":[65],"i.e.,":[66],"diversity":[67],"individuality.":[69],"For":[70,88],"aspect":[72,90],"of":[73,91,144],"diversity,":[74],"we":[75,93],"propose":[76,94],"maximum":[78],"entropy":[79],"model":[80],"based":[81],"on":[82],"ensembled":[83],"options":[84],"encourage":[86],"exploration.":[87],"individuality,":[92],"distinguish":[96],"each":[97,106],"option":[98,107,122],"accurately,":[99],"utilizing":[100],"mutual":[101],"formation":[102],"minimization,":[103],"so":[104],"that":[105,155],"better":[109],"express":[110],"function.":[112],"name":[114],"our":[115,156],"framework":[116],"as":[117],"an":[118],"ensemble":[119],"with":[120,130],"soft":[121],"(ESO)":[123],"critics.":[124],"Furthermore,":[125],"residual":[127,142],"algorithm":[128],"(RA)":[129],"bidirectional":[132],"target":[133],"network":[134],"introduced":[136],"stabilize":[138],"bootstrapping,":[139],"yielding":[140],"version":[143],"ESO.":[145],"provide":[147],"detailed":[148],"analysis":[149],"for":[150],"extensive":[151],"experiments,":[152],"shows":[154],"method":[157],"boosts":[158],"performance":[159],"commonly":[161],"used":[162],"continuous":[163],"control":[164],"tasks.":[165]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2021-12-06T00:00:00"}
