{"id":"https://openalex.org/W4375869032","doi":"https://doi.org/10.1109/icassp49357.2023.10095379","title":"Complementary Learning System Based Intrinsic Reward in Reinforcement Learning","display_name":"Complementary Learning System Based Intrinsic Reward in Reinforcement Learning","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869032","doi":"https://doi.org/10.1109/icassp49357.2023.10095379"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095379","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095379","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016262505","display_name":"Zijian Gao","orcid":"https://orcid.org/0000-0001-5151-3381"},"institutions":[{"id":"https://openalex.org/I198357462","display_name":"Changsha University","ror":"https://ror.org/011d8sm39","country_code":"CN","type":"education","lineage":["https://openalex.org/I198357462"]},{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zijian Gao","raw_affiliation_strings":["National University of Defense Technology,Changsha,China","Key Laboratory of Software Engineering for Complex Systems, Changsha, China","National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"Key Laboratory of Software Engineering for Complex Systems, Changsha, China","institution_ids":["https://openalex.org/I198357462"]},{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013340793","display_name":"Kele Xu","orcid":"https://orcid.org/0000-0001-5997-5169"},"institutions":[{"id":"https://openalex.org/I198357462","display_name":"Changsha University","ror":"https://ror.org/011d8sm39","country_code":"CN","type":"education","lineage":["https://openalex.org/I198357462"]},{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kele Xu","raw_affiliation_strings":["National University of Defense Technology,Changsha,China","National University of Defense Technology, Changsha, China","Key Laboratory of Software Engineering for Complex Systems, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"Key Laboratory of Software Engineering for Complex Systems, Changsha, China","institution_ids":["https://openalex.org/I198357462"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091542467","display_name":"Hongda Jia","orcid":"https://orcid.org/0000-0003-4112-7879"},"institutions":[{"id":"https://openalex.org/I198357462","display_name":"Changsha University","ror":"https://ror.org/011d8sm39","country_code":"CN","type":"education","lineage":["https://openalex.org/I198357462"]},{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongda Jia","raw_affiliation_strings":["National University of Defense Technology,Changsha,China","National University of Defense Technology, Changsha, China","Key Laboratory of Software Engineering for Complex Systems, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"Key Laboratory of Software Engineering for Complex Systems, Changsha, China","institution_ids":["https://openalex.org/I198357462"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061252955","display_name":"Tianjiao Wan","orcid":"https://orcid.org/0000-0002-2423-4982"},"institutions":[{"id":"https://openalex.org/I198357462","display_name":"Changsha University","ror":"https://ror.org/011d8sm39","country_code":"CN","type":"education","lineage":["https://openalex.org/I198357462"]},{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianjiao Wan","raw_affiliation_strings":["National University of Defense Technology,Changsha,China","National University of Defense Technology, Changsha, China","Key Laboratory of Software Engineering for Complex Systems, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"Key Laboratory of Software Engineering for Complex Systems, Changsha, China","institution_ids":["https://openalex.org/I198357462"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088885490","display_name":"Bo Ding","orcid":"https://orcid.org/0000-0002-1236-8318"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]},{"id":"https://openalex.org/I198357462","display_name":"Changsha University","ror":"https://ror.org/011d8sm39","country_code":"CN","type":"education","lineage":["https://openalex.org/I198357462"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Ding","raw_affiliation_strings":["National University of Defense Technology,Changsha,China","Key Laboratory of Software Engineering for Complex Systems, Changsha, China","National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"Key Laboratory of Software Engineering for Complex Systems, Changsha, China","institution_ids":["https://openalex.org/I198357462"]},{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039795290","display_name":"Dawei Feng","orcid":"https://orcid.org/0000-0002-7587-8905"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]},{"id":"https://openalex.org/I198357462","display_name":"Changsha University","ror":"https://ror.org/011d8sm39","country_code":"CN","type":"education","lineage":["https://openalex.org/I198357462"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dawei Feng","raw_affiliation_strings":["National University of Defense Technology,Changsha,China","National University of Defense Technology, Changsha, China","Key Laboratory of Software Engineering for Complex Systems, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"Key Laboratory of Software Engineering for Complex Systems, Changsha, China","institution_ids":["https://openalex.org/I198357462"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083124500","display_name":"Xinjun Mao","orcid":"https://orcid.org/0000-0001-6003-5748"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinjun Mao","raw_affiliation_strings":["National University of Defense Technology,Changsha,China","National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101522100","display_name":"Huaimin Wang","orcid":"https://orcid.org/0000-0002-3245-1901"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]},{"id":"https://openalex.org/I198357462","display_name":"Changsha University","ror":"https://ror.org/011d8sm39","country_code":"CN","type":"education","lineage":["https://openalex.org/I198357462"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaimin Wang","raw_affiliation_strings":["National University of Defense Technology,Changsha,China","Key Laboratory of Software Engineering for Complex Systems, Changsha, China","National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]},{"raw_affiliation_string":"Key Laboratory of Software Engineering for Complex Systems, Changsha, China","institution_ids":["https://openalex.org/I198357462"]},{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5016262505"],"corresponding_institution_ids":["https://openalex.org/I170215575","https://openalex.org/I198357462"],"apc_list":null,"apc_paid":null,"fwci":0.7236,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.6740027,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10581","display_name":"Neural dynamics and brain function","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10581","display_name":"Neural dynamics and brain function","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.9487000107765198,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8759585618972778},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7615199089050293},{"id":"https://openalex.org/keywords/curiosity","display_name":"Curiosity","score":0.7367488145828247},{"id":"https://openalex.org/keywords/cls-upper-limits","display_name":"CLs upper limits","score":0.6616957187652588},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6153851747512817},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.5971362590789795},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.5909119844436646},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5078385472297668},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.09441825747489929},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.07365480065345764}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8759585618972778},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7615199089050293},{"id":"https://openalex.org/C33435437","wikidata":"https://www.wikidata.org/wiki/Q366791","display_name":"Curiosity","level":2,"score":0.7367488145828247},{"id":"https://openalex.org/C190729725","wikidata":"https://www.wikidata.org/wiki/Q5012817","display_name":"CLs upper limits","level":2,"score":0.6616957187652588},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6153851747512817},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.5971362590789795},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.5909119844436646},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5078385472297668},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.09441825747489929},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.07365480065345764},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C119767625","wikidata":"https://www.wikidata.org/wiki/Q618211","display_name":"Optometry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095379","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095379","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W2000474946","https://openalex.org/W2036107051","https://openalex.org/W2103104224","https://openalex.org/W2131600418","https://openalex.org/W2150468603","https://openalex.org/W2150595230","https://openalex.org/W2424347275","https://openalex.org/W2788388592","https://openalex.org/W2953070460","https://openalex.org/W2953326529","https://openalex.org/W2953772919","https://openalex.org/W2963438456","https://openalex.org/W2963523627","https://openalex.org/W2964067469","https://openalex.org/W3034368386","https://openalex.org/W3036619998","https://openalex.org/W3132674603","https://openalex.org/W3138154797","https://openalex.org/W3167112175","https://openalex.org/W3214229832","https://openalex.org/W4221153115","https://openalex.org/W4286891004","https://openalex.org/W4287867830","https://openalex.org/W6733814495","https://openalex.org/W6748603076","https://openalex.org/W6756303580","https://openalex.org/W6762863188","https://openalex.org/W6765240361","https://openalex.org/W6771807793","https://openalex.org/W6791000347","https://openalex.org/W6791194670","https://openalex.org/W6796667233","https://openalex.org/W6803067813","https://openalex.org/W6809871153"],"related_works":["https://openalex.org/W4285676344","https://openalex.org/W3094054656","https://openalex.org/W2123270665","https://openalex.org/W4382584175","https://openalex.org/W2060310955","https://openalex.org/W2284924956","https://openalex.org/W3043413210","https://openalex.org/W2613740288","https://openalex.org/W4383268304","https://openalex.org/W3163092301"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1],"learning":[2,29,36,41],"has":[3],"achieved":[4],"encouraging":[5],"performance":[6,142],"in":[7],"many":[8],"realms.":[9],"However,":[10],"one":[11],"of":[12,19,127],"its":[13],"primary":[14],"challenges":[15],"is":[16,23],"the":[17,50,75,100,104,108,125,134],"sparsity":[18],"extrinsic":[20,80],"rewards,":[21],"which":[22,71,111],"still":[24],"far":[25],"from":[26],"solved.":[27],"Complementary":[28],"system":[30],"theory":[31],"suggests":[32],"that":[33,52,137],"effective":[34],"human":[35],"relies":[37],"on":[38,143],"two":[39,105],"complementary":[40],"systems":[42],"utilizing":[43],"short-term":[44,90],"and":[45,91,146],"long-term":[46,92],"memories.":[47],"Inspired":[48],"by":[49,56,78],"fact":[51],"humans":[53],"evaluate":[54],"curiosity":[55],"comparing":[57],"current":[58],"observations":[59],"with":[60,89],"historical":[61],"information,":[62],"we":[63,83,129],"propose":[64],"a":[65,85],"novel":[66],"intrinsic":[67,109],"reward,":[68,110],"namely":[69],"CLS-IR,":[70,128],"aims":[72],"to":[73,120],"address":[74],"problems":[76],"caused":[77],"sparse":[79],"rewards.":[81],"Specifically,":[82],"train":[84],"self-supervised":[86],"predictive":[87],"model":[88],"memories":[93,106],"via":[94],"exponential":[95],"moving":[96],"averages.":[97],"We":[98],"employ":[99],"information":[101],"gain":[102],"between":[103],"as":[107],"does":[112],"not":[113],"incur":[114],"additional":[115],"training":[116],"costs":[117],"but":[118],"leads":[119],"better":[121],"exploration.":[122],"To":[123],"investigate":[124],"effectiveness":[126],"conduct":[130],"extensive":[131],"experimental":[132],"evaluations;":[133],"results":[135],"demonstrate":[136],"CLS-IR":[138],"can":[139],"achieve":[140],"state-of-the-art":[141],"Atari":[144],"games":[145],"DeepMind":[147],"Control":[148],"Suite.":[149]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-02-25T23:00:34.991745","created_date":"2025-10-10T00:00:00"}
