{"id":"https://openalex.org/W3194626037","doi":"https://doi.org/10.1109/tetci.2022.3140375","title":"Optimal Actor-Critic Policy With Optimized Training Datasets","display_name":"Optimal Actor-Critic Policy With Optimized Training Datasets","publication_year":2022,"publication_date":"2022-01-20","ids":{"openalex":"https://openalex.org/W3194626037","doi":"https://doi.org/10.1109/tetci.2022.3140375","mag":"3194626037"},"language":"en","primary_location":{"id":"doi:10.1109/tetci.2022.3140375","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetci.2022.3140375","pdf_url":null,"source":{"id":"https://openalex.org/S4210210251","display_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","issn_l":"2471-285X","issn":["2471-285X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2108.06911","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043976016","display_name":"Chayan Banerjee","orcid":"https://orcid.org/0000-0003-1039-3744"},"institutions":[{"id":"https://openalex.org/I78757542","display_name":"University of Newcastle Australia","ror":"https://ror.org/00eae9z71","country_code":"AU","type":"education","lineage":["https://openalex.org/I78757542"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Chayan Banerjee","raw_affiliation_strings":["School of Engineering, University of Newcastle, Callaghan, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"School of Engineering, University of Newcastle, Callaghan, NSW, Australia","institution_ids":["https://openalex.org/I78757542"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011175076","display_name":"Zhiyong Chen","orcid":"https://orcid.org/0000-0002-6928-4449"},"institutions":[{"id":"https://openalex.org/I78757542","display_name":"University of Newcastle Australia","ror":"https://ror.org/00eae9z71","country_code":"AU","type":"education","lineage":["https://openalex.org/I78757542"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zhiyong Chen","raw_affiliation_strings":["School of Engineering, University of Newcastle, Callaghan, NSW, Australia","University of Newcastle, Australia"],"affiliations":[{"raw_affiliation_string":"School of Engineering, University of Newcastle, Callaghan, NSW, Australia","institution_ids":["https://openalex.org/I78757542"]},{"raw_affiliation_string":"University of Newcastle, Australia","institution_ids":["https://openalex.org/I78757542"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008309887","display_name":"Nasimul Noman","orcid":"https://orcid.org/0000-0002-8566-0870"},"institutions":[{"id":"https://openalex.org/I78757542","display_name":"University of Newcastle Australia","ror":"https://ror.org/00eae9z71","country_code":"AU","type":"education","lineage":["https://openalex.org/I78757542"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Nasimul Noman","raw_affiliation_strings":["School of Information and Physical Sciences, University of Newcastle, Callaghan, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"School of Information and Physical Sciences, University of Newcastle, Callaghan, NSW, Australia","institution_ids":["https://openalex.org/I78757542"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101942306","display_name":"Mohsen Zamani","orcid":"https://orcid.org/0000-0003-2972-7417"},"institutions":[{"id":"https://openalex.org/I78757542","display_name":"University of Newcastle Australia","ror":"https://ror.org/00eae9z71","country_code":"AU","type":"education","lineage":["https://openalex.org/I78757542"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Mohsen Zamani","raw_affiliation_strings":["School of Engineering, University of Newcastle, Callaghan, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"School of Engineering, University of Newcastle, Callaghan, NSW, Australia","institution_ids":["https://openalex.org/I78757542"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5043976016"],"corresponding_institution_ids":["https://openalex.org/I78757542"],"apc_list":null,"apc_paid":null,"fwci":0.138,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.5049239,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"6","issue":"6","first_page":"1324","last_page":"1334"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9456999897956848,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9261999726295471,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8093485236167908},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7379613518714905},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6793222427368164},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.6746701598167419},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6599992513656616},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5063201189041138},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4936772286891937},{"id":"https://openalex.org/keywords/local-optimum","display_name":"Local optimum","score":0.4788548946380615},{"id":"https://openalex.org/keywords/genetic-algorithm","display_name":"Genetic algorithm","score":0.47320693731307983},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.44182682037353516},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4173162579536438},{"id":"https://openalex.org/keywords/iterative-and-incremental-development","display_name":"Iterative and incremental development","score":0.4138803482055664},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11312606930732727}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8093485236167908},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7379613518714905},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6793222427368164},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.6746701598167419},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6599992513656616},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5063201189041138},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4936772286891937},{"id":"https://openalex.org/C141934464","wikidata":"https://www.wikidata.org/wiki/Q3305386","display_name":"Local optimum","level":2,"score":0.4788548946380615},{"id":"https://openalex.org/C8880873","wikidata":"https://www.wikidata.org/wiki/Q187787","display_name":"Genetic algorithm","level":2,"score":0.47320693731307983},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.44182682037353516},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4173162579536438},{"id":"https://openalex.org/C143587482","wikidata":"https://www.wikidata.org/wiki/Q1543216","display_name":"Iterative and incremental development","level":2,"score":0.4138803482055664},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11312606930732727},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/tetci.2022.3140375","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetci.2022.3140375","pdf_url":null,"source":{"id":"https://openalex.org/S4210210251","display_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","issn_l":"2471-285X","issn":["2471-285X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2108.06911","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2108.06911","pdf_url":"https://arxiv.org/pdf/2108.06911","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"mag:3194626037","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2108.06911v1","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:uon:48020","is_oa":false,"landing_page_url":"http://hdl.handle.net/1959.13/1468202","pdf_url":null,"source":{"id":"https://openalex.org/S4377196471","display_name":"NOVA (University of Newcastle Australia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I78757542","host_organization_name":"University of Newcastle Australia","host_organization_lineage":["https://openalex.org/I78757542"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"journal article"},{"id":"doi:10.48550/arxiv.2108.06911","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2108.06911","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2108.06911","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2108.06911","pdf_url":"https://arxiv.org/pdf/2108.06911","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3194626037.pdf","grobid_xml":"https://content.openalex.org/works/W3194626037.grobid-xml"},"referenced_works_count":70,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W192920577","https://openalex.org/W1579853615","https://openalex.org/W1757796397","https://openalex.org/W1771410628","https://openalex.org/W1991119064","https://openalex.org/W2056370235","https://openalex.org/W2121863487","https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2155968351","https://openalex.org/W2156347136","https://openalex.org/W2156737235","https://openalex.org/W2173248099","https://openalex.org/W2554984891","https://openalex.org/W2556958149","https://openalex.org/W2736601468","https://openalex.org/W2746553466","https://openalex.org/W2749928749","https://openalex.org/W2766447205","https://openalex.org/W2778749116","https://openalex.org/W2794487566","https://openalex.org/W2802164917","https://openalex.org/W2804816149","https://openalex.org/W2810602713","https://openalex.org/W2898585845","https://openalex.org/W2907537824","https://openalex.org/W2935588605","https://openalex.org/W2943810245","https://openalex.org/W2949608212","https://openalex.org/W2953981431","https://openalex.org/W2962793652","https://openalex.org/W2963484919","https://openalex.org/W2963923407","https://openalex.org/W2963946985","https://openalex.org/W2964161785","https://openalex.org/W2990346675","https://openalex.org/W3006388420","https://openalex.org/W3010145806","https://openalex.org/W3016525976","https://openalex.org/W3022566517","https://openalex.org/W3029509103","https://openalex.org/W3091086138","https://openalex.org/W3106449828","https://openalex.org/W3127756416","https://openalex.org/W4312922397","https://openalex.org/W6627932998","https://openalex.org/W6634817459","https://openalex.org/W6637967152","https://openalex.org/W6638018090","https://openalex.org/W6682849425","https://openalex.org/W6683001934","https://openalex.org/W6683195989","https://openalex.org/W6684921986","https://openalex.org/W6729507393","https://openalex.org/W6730111887","https://openalex.org/W6741002519","https://openalex.org/W6743802245","https://openalex.org/W6746622358","https://openalex.org/W6747473740","https://openalex.org/W6748839928","https://openalex.org/W6750138292","https://openalex.org/W6751892187","https://openalex.org/W6755903938","https://openalex.org/W6757380569","https://openalex.org/W6761920647","https://openalex.org/W6764976746","https://openalex.org/W6765121789","https://openalex.org/W6776438516","https://openalex.org/W6776601253"],"related_works":["https://openalex.org/W2941260551","https://openalex.org/W2949847538","https://openalex.org/W2994651240","https://openalex.org/W3118004751","https://openalex.org/W3205379033","https://openalex.org/W3006333061","https://openalex.org/W3167556547","https://openalex.org/W2944890708","https://openalex.org/W3046369681","https://openalex.org/W2901006014","https://openalex.org/W1557623003","https://openalex.org/W3105423414","https://openalex.org/W3210973725","https://openalex.org/W2981924372","https://openalex.org/W2997813960","https://openalex.org/W2988200242","https://openalex.org/W3096161972","https://openalex.org/W3111904893","https://openalex.org/W1591851573","https://openalex.org/W3008275213"],"abstract_inverted_index":{"Actor-critic(AC)":[0],"algorithms":[1,132],"are":[2],"known":[3],"for":[4],"their":[5],"efficacy":[6],"and":[7,31,39,50,54,109,154],"high":[8],"performance":[9,126],"in":[10,133],"solving":[11],"reinforcement":[12],"learning":[13,55],"problems,":[14],"but":[15],"they":[16],"also":[17],"suffer":[18],"from":[19,56,90],"low":[20],"sampling":[21,73,147],"efficiency.":[22],"An":[23],"AC":[24,92,131],"based":[25],"policy":[26,42,106,116],"optimization":[27,96],"process":[28],"is":[29,97,155],"iterative":[30],"needs":[32],"to":[33,37,66,79,128,152],"access":[34],"the":[35,41,46,81,91,120,144],"agent-environment":[36],"evaluate":[38],"update":[40],"by":[43,119],"rolling":[44],"out":[45],"policy,":[47],"collecting":[48],"rewards":[49],"states":[51],"(i.e.":[52],"samples),":[53],"them.":[57],"It":[58],"ultimately":[59],"requires":[60],"a":[61,77,100,105,110],"huge":[62],"number":[63],"of":[64,99],"samples":[65,88],"learn":[67],"an":[68],"optimal":[69,115],"policy.":[70],"To":[71],"improve":[72],"efficiency,":[74,148],"we":[75],"propose":[76],"strategy":[78],"optimize":[80],"training":[82,122],"dataset":[83,95,123],"that":[84,143],"contains":[85],"significantly":[86],"less":[87],"collected":[89],"process.":[93],"The":[94,114],"made":[98],"best":[101],"episode":[102],"only":[103],"operation,":[104],"parameter-fitness":[107],"model,":[108],"genetic":[111],"algorithm":[112],"module.":[113],"network":[117],"trained":[118],"optimized":[121],"exhibits":[124],"superior":[125],"compared":[127],"many":[129],"contemporary":[130],"controlling":[134],"autonomous":[135],"dynamical":[136],"systems.":[137],"Evaluation":[138],"on":[139],"standard":[140],"benchmarks":[141],"shows":[142],"method":[145],"improves":[146],"ensures":[149],"faster":[150],"convergence":[151],"optima,":[153],"more":[156],"data-efficient":[157],"than":[158],"its":[159],"counterparts.":[160]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
