{"id":"https://openalex.org/W4417230782","doi":"https://doi.org/10.1109/tcyb.2025.3637764","title":"Enhancing Exploration in Actor-Critic Algorithms: An Approach to Incentivize Plausible Novel States","display_name":"Enhancing Exploration in Actor-Critic Algorithms: An Approach to Incentivize Plausible Novel States","publication_year":2025,"publication_date":"2025-12-09","ids":{"openalex":"https://openalex.org/W4417230782","doi":"https://doi.org/10.1109/tcyb.2025.3637764","pmid":"https://pubmed.ncbi.nlm.nih.gov/41364562"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2025.3637764","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2025.3637764","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043976016","display_name":"Chayan Banerjee","orcid":"https://orcid.org/0000-0003-1039-3744"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Chayan Banerjee","raw_affiliation_strings":["School of Electrical Engineering and Robotics, Queensland University of Technology, Brisbane, QLD, Australia"],"raw_orcid":"https://orcid.org/0000-0003-1039-3744","affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Robotics, Queensland University of Technology, Brisbane, QLD, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100370053","display_name":"Zhiyong Chen","orcid":"https://orcid.org/0000-0002-2033-4249"},"institutions":[{"id":"https://openalex.org/I78757542","display_name":"University of Newcastle Australia","ror":"https://ror.org/00eae9z71","country_code":"AU","type":"education","lineage":["https://openalex.org/I78757542"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zhiyong Chen","raw_affiliation_strings":["School of Engineering, The University of Newcastle, Callaghan, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0002-2033-4249","affiliations":[{"raw_affiliation_string":"School of Engineering, The University of Newcastle, Callaghan, NSW, Australia","institution_ids":["https://openalex.org/I78757542"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008309887","display_name":"Nasimul Noman","orcid":"https://orcid.org/0000-0002-8566-0870"},"institutions":[{"id":"https://openalex.org/I78757542","display_name":"University of Newcastle Australia","ror":"https://ror.org/00eae9z71","country_code":"AU","type":"education","lineage":["https://openalex.org/I78757542"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Nasimul Noman","raw_affiliation_strings":["School of Information and Physical Sciences, The University of Newcastle, Callaghan, Australia"],"raw_orcid":"https://orcid.org/0000-0002-8566-0870","affiliations":[{"raw_affiliation_string":"School of Information and Physical Sciences, The University of Newcastle, Callaghan, Australia","institution_ids":["https://openalex.org/I78757542"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18793363,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"56","issue":"4","first_page":"2271","last_page":"2282"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8992999792098999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8992999792098999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.01600000075995922,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.013100000098347664,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.705299973487854},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6897000074386597},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6514999866485596},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5932000279426575},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.4812999963760376},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.36500000953674316},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.34450000524520874}],"concepts":[{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.705299973487854},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6980000138282776},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6897000074386597},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6514999866485596},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5932000279426575},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5680999755859375},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5673999786376953},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.4812999963760376},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.36500000953674316},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.34450000524520874},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.31060001254081726},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.305400013923645},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.30379998683929443},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.29750001430511475},{"id":"https://openalex.org/C166052673","wikidata":"https://www.wikidata.org/wiki/Q83021","display_name":"Empirical evidence","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.26109999418258667},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcyb.2025.3637764","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2025.3637764","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:41364562","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41364562","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2014268383","https://openalex.org/W2145339207","https://openalex.org/W2268194897","https://openalex.org/W2556820590","https://openalex.org/W2904246096","https://openalex.org/W2912061736","https://openalex.org/W2986925736","https://openalex.org/W3008976001","https://openalex.org/W3128703278","https://openalex.org/W3175662487","https://openalex.org/W3195143297","https://openalex.org/W3212580602","https://openalex.org/W4214918501","https://openalex.org/W4225832811","https://openalex.org/W4281258657","https://openalex.org/W4310873825","https://openalex.org/W4376274293","https://openalex.org/W4383109096","https://openalex.org/W4415795631","https://openalex.org/W4415798146","https://openalex.org/W7103759348"],"related_works":[],"abstract_inverted_index":{"Actor-critic":[0],"(AC)":[1],"algorithms":[2,130],"are":[3],"model-free":[4],"deep":[5],"reinforcement":[6],"learning":[7,40],"techniques":[8],"that":[9,106],"have":[10],"consistently":[11],"demonstrated":[12],"effectiveness":[13],"across":[14,157],"various":[15],"domains.":[16],"Enhancing":[17],"exploration":[18,52,89,124],"(action":[19],"entropy)":[20],"and":[21,100,138,150],"exploitation":[22],"(expected":[23],"return)":[24],"through":[25],"more":[26],"efficient":[27],"sample":[28,136],"utilization":[29],"is":[30,42],"pivotal":[31],"to":[32,43,76,87],"their":[33],"success.":[34],"A":[35],"key":[36],"strategy":[37],"for":[38,70],"a":[39,66,97],"algorithm":[41,162],"intelligently":[44],"navigate":[45],"the":[46,51,101,123],"environment's":[47],"state":[48],"space,":[49],"prioritizing":[50],"of":[53,104,125,159],"rarely":[54,64],"visited":[55],"states":[56],"over":[57],"frequently":[58],"encountered":[59],"ones.":[60],"However,":[61],"conventional":[62],"approaches":[63],"quantify":[65],"novel":[67,127],"state's":[68,98],"utility":[69],"policy":[71],"learning,":[72],"which":[73,108],"can":[74,131],"lead":[75],"inefficient":[77],"exploration.":[78],"To":[79],"address":[80],"this,":[81],"we":[82,109],"propose":[83],"an":[84,92],"innovative":[85],"approach":[86],"bolster":[88],"by":[90],"employing":[91],"intrinsic":[93],"reward":[94],"based":[95],"on":[96],"novelty":[99],"potential":[102],"benefits":[103],"exploring":[105],"state,":[107],"term":[110],"plausible":[111],"novelty.":[112],"Our":[113],"method":[114],"seamlessly":[115],"integrates":[116],"with":[117],"off-policy":[118],"AC":[119,129],"algorithms.":[120],"By":[121],"incentivizing":[122],"plausibly":[126],"states,":[128],"achieve":[132],"substantial":[133],"improvements":[134],"in":[135,147,153,164],"efficiency":[137],"overall":[139],"training":[140,148],"performance.":[141],"Empirical":[142],"results":[143],"demonstrate":[144],"19%":[145],"improvement":[146],"return":[149],"30%":[151],"reduction":[152],"standard":[154],"deviation,":[155],"averaged":[156],"comparisons":[158],"three":[160],"benchmark":[161],"pairs":[163],"five":[165],"different":[166],"environments.":[167]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-10T00:00:00"}
