{"id":"https://openalex.org/W2896073909","doi":"https://doi.org/10.1109/ijcnn.2018.8489747","title":"AC2: A Policy Gradient Actor with Primary and Secondary Critics","display_name":"AC2: A Policy Gradient Actor with Primary and Secondary Critics","publication_year":2018,"publication_date":"2018-07-01","ids":{"openalex":"https://openalex.org/W2896073909","doi":"https://doi.org/10.1109/ijcnn.2018.8489747","mag":"2896073909"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn.2018.8489747","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2018.8489747","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030904209","display_name":"Alfonso B. Labao","orcid":"https://orcid.org/0000-0001-5321-377X"},"institutions":[{"id":"https://openalex.org/I87074743","display_name":"University of the Philippines Diliman","ror":"https://ror.org/03tbh6y23","country_code":"PH","type":"education","lineage":["https://openalex.org/I103911934","https://openalex.org/I87074743"]}],"countries":["PH"],"is_corresponding":true,"raw_author_name":"Alfonso B. Labao","raw_affiliation_strings":["Computer Vision & Machine Intelligence Group, University of the Philippines Diliman"],"affiliations":[{"raw_affiliation_string":"Computer Vision & Machine Intelligence Group, University of the Philippines Diliman","institution_ids":["https://openalex.org/I87074743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005869214","display_name":"Prospero C. Naval","orcid":"https://orcid.org/0000-0001-7140-1707"},"institutions":[{"id":"https://openalex.org/I87074743","display_name":"University of the Philippines Diliman","ror":"https://ror.org/03tbh6y23","country_code":"PH","type":"education","lineage":["https://openalex.org/I103911934","https://openalex.org/I87074743"]}],"countries":["PH"],"is_corresponding":false,"raw_author_name":"Prospero C. Naval","raw_affiliation_strings":["Computer Vision & Machine Intelligence Group, University of the Philippines Diliman"],"affiliations":[{"raw_affiliation_string":"Computer Vision & Machine Intelligence Group, University of the Philippines Diliman","institution_ids":["https://openalex.org/I87074743"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5030904209"],"corresponding_institution_ids":["https://openalex.org/I87074743"],"apc_list":null,"apc_paid":null,"fwci":0.1629,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.59363306,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"521","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9693999886512756,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.6990622282028198},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.606170654296875},{"id":"https://openalex.org/keywords/percentile","display_name":"Percentile","score":0.5781763195991516},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.468935489654541},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.46034055948257446},{"id":"https://openalex.org/keywords/series","display_name":"Series (stratigraphy)","score":0.4491290748119354},{"id":"https://openalex.org/keywords/train","display_name":"Train","score":0.4483812153339386},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.4234221875667572},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41156670451164246},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2669617235660553},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.24892717599868774},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09692716598510742}],"concepts":[{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.6990622282028198},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.606170654296875},{"id":"https://openalex.org/C122048520","wikidata":"https://www.wikidata.org/wiki/Q2913954","display_name":"Percentile","level":2,"score":0.5781763195991516},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.468935489654541},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.46034055948257446},{"id":"https://openalex.org/C143724316","wikidata":"https://www.wikidata.org/wiki/Q312468","display_name":"Series (stratigraphy)","level":2,"score":0.4491290748119354},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.4483812153339386},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.4234221875667572},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41156670451164246},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2669617235660553},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.24892717599868774},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09692716598510742},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn.2018.8489747","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2018.8489747","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1191599655","https://openalex.org/W2091656757","https://openalex.org/W2094387729","https://openalex.org/W2121863487","https://openalex.org/W2132317897","https://openalex.org/W2145339207","https://openalex.org/W2155968351","https://openalex.org/W2156737235","https://openalex.org/W2173564293","https://openalex.org/W2554984891","https://openalex.org/W2596758708","https://openalex.org/W2604268026","https://openalex.org/W2746553466","https://openalex.org/W2761873684","https://openalex.org/W2767029636","https://openalex.org/W2782108898","https://openalex.org/W2919115771","https://openalex.org/W2950492145","https://openalex.org/W2951799221","https://openalex.org/W2953364219","https://openalex.org/W2963423916","https://openalex.org/W2963959137","https://openalex.org/W2964043796","https://openalex.org/W4214717370","https://openalex.org/W4298876402","https://openalex.org/W6627932998","https://openalex.org/W6683195989","https://openalex.org/W6683300800","https://openalex.org/W6685444567","https://openalex.org/W6692846177","https://openalex.org/W6729507393","https://openalex.org/W6735506055","https://openalex.org/W6736309856","https://openalex.org/W6744838376","https://openalex.org/W6747359475"],"related_works":["https://openalex.org/W618248309","https://openalex.org/W2377336366","https://openalex.org/W1550559433","https://openalex.org/W2189235034","https://openalex.org/W1568097102","https://openalex.org/W4390419160","https://openalex.org/W1601203902","https://openalex.org/W2075798043","https://openalex.org/W4225671779","https://openalex.org/W2102464536"],"abstract_inverted_index":{"We":[0,24,62,88],"propose":[1],"AC2,":[2],"a":[3,9,12,36,93,124],"policy":[4,22],"gradient":[5,82],"algorithm":[6,65,91,133],"that":[7,30,46,69,75],"employs":[8],"primary":[10],"and":[11,19,28,73,98,141],"secondary":[13,37,120],"critic":[14,38,67,106,121,140],"to":[15],"manage":[16],"both":[17],"bias":[18],"variance":[20],"in":[21,50,84,92,109],"gradients.":[23],"present":[25],"through":[26],"analyses":[27],"experiments":[29,97],"performance":[31,101,114,137],"becomes":[32],"more":[33,71,85,105,116],"stable":[34,117],"if":[35,118],"concentrates":[39],"on":[40,123],"few":[41,125],"problematic":[42,126],"states":[43,127],"(upper":[44],"95-percentile)":[45],"cause":[47],"extreme":[48],"changes":[49],"value":[51],"estimates.":[52],"This":[53],"scheme":[54],"can":[55],"keep":[56],"biases":[57],"tolerable":[58],"while":[59],"lowering":[60],"variances.":[61],"relate":[63],"our":[64,90],"with":[66,104],"ensembles":[68,103],"have":[70],"components":[72,107],"show":[74],"ensemble":[76],"averaging":[77],"may":[78],"not":[79],"significantly":[80],"reduce":[81],"variances":[83],"difficult":[86],"environments.":[87,111],"test":[89],"series":[94],"of":[95],"high-dimensional":[96],"report":[99],"better":[100,135],"than":[102,128,138],"especially":[108],"harder":[110],"In":[112],"addition,":[113],"is":[115],"the":[119],"trains":[122],"by":[129],"random":[130],"sampling.":[131],"Our":[132],"reports":[134],"reward":[136],"single":[139],"other":[142],"RL":[143],"models.":[144]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
